The goal of this project is to utilize statistical matching methods to search for a subset of Beta clients that are representative of Release. The specific use-case of this proof-of-concept is utilizing performance, configuration, and environment covariates of the clients for matching. Validation of the matching is performed on a hold-out set of Firefox user engagement covariates.
## Loading the training dataset
#load("~/GitHub/ff-beta-release-matching/poc/EDA/data_milestone2_df_train_validate_20191025.RData")
load('~/ff-beta-release-matching/poc/matchIt/feature_selection.RData')| rows | columns | discrete_columns | continuous_columns | all_missing_columns | total_missing_values | complete_rows | total_observations | memory_usage |
|---|---|---|---|---|---|---|---|---|
| 302819 | 95 | 7 | 88 | 0 | 0 | 302819 | 28767805 | 185350016 |
df_beta <- df_train_encoder %>% filter(label_beta == 1)
df_rel <- df_train_encoder %>% filter(label_beta == 0)# sampling for beta overrepresentation
n_beta <- nrow(df_beta)
build_df <- function(multiple){
df <- df_rel %>%
sample_n(size = round(n_beta / multiple)) %>%
rbind(df_beta)
}
df_1x <- build_df(1)
df_2x <- build_df(2)
df_4x <- build_df(4)# downsampling
df_1x_sm <- df_1x %>% sample_n(size = 70000)
df_2x_sm <- df_2x %>% sample_n(size = 70000)
df_4x_sm <- df_4x %>% sample_n(size = 70000)engagement <- c('active_hours','active_hours_max','uri_count','uri_count_max','search_count','search_count_max','num_pages','num_pages_max','daily_max_tabs','daily_max_tabs_max','daily_unique_domains','daily_unique_domains_max','daily_tabs_opened','daily_tabs_opened_max')
usage <- c('num_active_days','daily_num_sessions_started','daily_num_sessions_started_max','session_length','session_length_max','profile_age')
environment <- c('cpu_cores','cpu_speed_mhz','cpu_vendor_AMD','cpu_vendor_Intel','cpu_vendor_Other','cpu_l2_cache_kb','cpu_l2_cache_kb_cat_l1024','cpu_l2_cache_kb_cat_l256','cpu_l2_cache_kb_cat_l512','cpu_l2_cache_kb_cat_g1024','memory_mb','is_wow64_True','distro_id_norm_acer','distro_id_norm_Mozilla','distro_id_norm_other','distro_id_norm_Yahoo','install_year')
geo <- c('country_US','timezone_offset','timezone_cat_m12_m10','timezone_cat_m10_m8','timezone_cat_m8_m6','timezone_cat_m6_m4','timezone_cat_m4_m2','timezone_cat_m2_0','timezone_cat_0_2','timezone_cat_2_4','timezone_cat_4_6','timezone_cat_6_8','timezone_cat_8_10','timezone_cat_10_12','timezone_cat_12_14','locale_enGB','locale_enUS')
settings <- c('num_bookmarks','num_addons','sync_configured_True','fxa_configured_True','is_default_browser_True','default_search_engine_Bing','default_search_engine_DuckDuckGo','default_search_engine_Google','default_search_engine_other_bundled','default_search_engine_other_nonbundled','default_search_engine_Yahoo')
page_load <- c('FX_PAGE_LOAD_MS_2_PARENT','TIME_TO_DOM_COMPLETE_MS','TIME_TO_DOM_CONTENT_LOADED_END_MS','TIME_TO_LOAD_EVENT_END_MS','TIME_TO_DOM_INTERACTIVE_MS','TIME_TO_NON_BLANK_PAINT_MS')
startup <- c('startup_ms','startup_ms_max')
stability <- c('content_crashes')The Boruta method was performed.
Groups of models to train.
exp_1 <- c('daily_num_sessions_started', 'daily_num_sessions_started_max', 'FX_PAGE_LOAD_MS_2_PARENT', 'fxa_configured_True', 'memory_mb', 'num_active_days', 'num_addons', 'num_bookmarks', 'profile_age','session_length','session_length_max','TIME_TO_DOM_COMPLETE_MS','TIME_TO_DOM_CONTENT_LOADED_END_MS','TIME_TO_DOM_INTERACTIVE_MS','TIME_TO_LOAD_EVENT_END_MS','TIME_TO_NON_BLANK_PAINT_MS','timezone_cat_0_2')
exp_2 <- c('country_US', 'daily_num_sessions_started', 'daily_num_sessions_started_max', 'default_search_engine_other_nonbundled', 'FX_PAGE_LOAD_MS_2_PARENT', 'fxa_configured_True', 'memory_mb', 'num_active_days', 'num_addons', 'num_bookmarks', 'profile_age', 'session_length','session_length_max', 'startup_ms', 'startup_ms_max', 'sync_configured_True', 'TIME_TO_DOM_COMPLETE_MS','TIME_TO_DOM_CONTENT_LOADED_END_MS','TIME_TO_DOM_INTERACTIVE_MS','TIME_TO_LOAD_EVENT_END_MS','TIME_TO_NON_BLANK_PAINT_MS','timezone_cat_0_2')
exp_3 <- c('daily_num_sessions_started', 'daily_num_sessions_started_max', 'FX_PAGE_LOAD_MS_2_PARENT', 'memory_mb', 'num_active_days', 'num_addons', 'num_bookmarks', 'profile_age', 'session_length', 'session_length_max','TIME_TO_DOM_COMPLETE_MS','TIME_TO_DOM_CONTENT_LOADED_END_MS','TIME_TO_DOM_INTERACTIVE_MS','TIME_TO_LOAD_EVENT_END_MS','TIME_TO_NON_BLANK_PAINT_MS')
exp_4 <- c('cpu_speed_mhz', 'daily_num_sessions_started', 'daily_num_sessions_started_max', 'default_search_engine_other_nonbundled', 'FX_PAGE_LOAD_MS_2_PARENT', 'memory_mb', 'num_active_days', 'num_addons', 'num_bookmarks', 'profile_age','session_length','session_length_max','startup_ms','startup_ms_max','TIME_TO_DOM_COMPLETE_MS','TIME_TO_DOM_CONTENT_LOADED_END_MS','TIME_TO_DOM_INTERACTIVE_MS','TIME_TO_LOAD_EVENT_END_MS','TIME_TO_NON_BLANK_PAINT_MS')Before propensity scores are calculated, it is a good practice to determine if the two groups are balanced.
The standardized difference can be used to compare the mean of continuous and binary variables between treatment groups. The standardized difference compares the difference in means in units of the pooled standard deviation and allows for the comparison of the relative balance of variables measured in different units. Although there is no universally agreed-upon criterion as to what threshold of the standardized difference can be used to indicate an important imbalance, a standard difference that is less than 10% (or \(0.1\)) has been taken to indicate a negligible difference in the mean or prevalence of a covariate between treatment groups.
cov <- df_1x_sm %>% dplyr::select(c(engagement, usage, environment, geo, settings, page_load, startup, stability))
treated <- (df_1x_sm$label_beta == 1)
std.diff <- apply(cov,2,function(x) 100*(mean(x[treated])- mean(x[!treated]))/(sqrt(0.5*(var(x[treated]) + var(x[!treated])))))
cov_df_1x <- sort(abs(std.diff))
cov_df_1x## timezone_cat_10_12 timezone_cat_m10_m8
## 0.002020219 0.056792827
## cpu_l2_cache_kb_cat_l512 num_pages_max
## 0.062301633 0.642185865
## num_pages profile_age
## 0.665851178 0.752899555
## uri_count timezone_cat_12_14
## 1.180585212 1.511339809
## active_hours uri_count_max
## 1.708558043 1.788244618
## search_count cpu_vendor_Intel
## 1.884701878 1.966273619
## active_hours_max daily_unique_domains
## 2.006072257 2.026512092
## default_search_engine_other_bundled daily_unique_domains_max
## 2.085133034 2.130201241
## timezone_cat_m4_m2 cpu_vendor_AMD
## 2.134605572 2.240685938
## default_search_engine_Bing search_count_max
## 2.366116908 2.518734566
## timezone_cat_m12_m10 timezone_cat_8_10
## 2.809454175 3.028076612
## cpu_l2_cache_kb_cat_l1024 install_year
## 3.151102734 3.592303876
## is_default_browser_True cpu_speed_mhz
## 4.001308502 4.038329299
## cpu_l2_cache_kb_cat_g1024 cpu_vendor_Other
## 4.143739359 4.408163238
## default_search_engine_DuckDuckGo cpu_l2_cache_kb_cat_l256
## 4.730245644 5.111454838
## memory_mb cpu_l2_cache_kb
## 5.192940909 5.234831861
## startup_ms_max default_search_engine_Yahoo
## 5.291039912 5.509467253
## startup_ms num_bookmarks
## 6.334446306 7.632044821
## timezone_cat_m2_0 num_active_days
## 8.692969595 10.105748395
## distro_id_norm_Yahoo cpu_cores
## 10.284820766 10.338602979
## default_search_engine_Google daily_tabs_opened_max
## 10.398060403 10.433509363
## daily_tabs_opened timezone_cat_m8_m6
## 11.863234141 11.962510809
## daily_max_tabs_max distro_id_norm_other
## 13.360409666 13.787252314
## daily_max_tabs default_search_engine_other_nonbundled
## 14.278625643 14.410219870
## distro_id_norm_acer sync_configured_True
## 14.922094859 16.222845661
## fxa_configured_True daily_num_sessions_started
## 16.586189555 16.753894230
## timezone_cat_6_8 timezone_cat_2_4
## 16.996481757 17.051202721
## session_length_max daily_num_sessions_started_max
## 17.420088072 17.685073795
## TIME_TO_DOM_CONTENT_LOADED_END_MS TIME_TO_NON_BLANK_PAINT_MS
## 18.160434079 20.548754976
## locale_enUS locale_enGB
## 22.568505667 22.568505667
## distro_id_norm_Mozilla FX_PAGE_LOAD_MS_2_PARENT
## 22.964152468 23.612576292
## session_length timezone_cat_4_6
## 24.543416548 26.359352608
## timezone_cat_0_2 TIME_TO_DOM_INTERACTIVE_MS
## 29.167095740 30.451816998
## TIME_TO_DOM_COMPLETE_MS timezone_cat_m6_m4
## 30.536471121 31.961826964
## TIME_TO_LOAD_EVENT_END_MS country_US
## 33.288781331 33.511743399
## timezone_offset is_wow64_True
## 47.359904817 56.337269760
## num_addons
## 77.090320013
cov <- df_2x_sm %>% dplyr::select(c(engagement, usage, environment, geo, settings, page_load, startup, stability))
treated <- (df_2x_sm$label_beta == 1)
std.diff <- apply(cov,2,function(x) 100*(mean(x[treated])- mean(x[!treated]))/(sqrt(0.5*(var(x[treated]) + var(x[!treated])))))
cov_df_2x <- sort(abs(std.diff))
cov_df_2x## num_pages_max profile_age
## 0.1780093 0.1870186
## num_pages cpu_l2_cache_kb_cat_l512
## 0.2074965 0.5048959
## search_count timezone_cat_10_12
## 0.7054902 0.9139527
## search_count_max daily_unique_domains_max
## 1.1980407 1.6358132
## timezone_cat_12_14 cpu_vendor_Intel
## 1.7338847 1.7744563
## daily_unique_domains cpu_vendor_AMD
## 1.8220546 1.9929355
## timezone_cat_m4_m2 cpu_l2_cache_kb_cat_l1024
## 2.1562510 2.2588314
## timezone_cat_m10_m8 timezone_cat_m12_m10
## 2.2686774 2.3766650
## uri_count default_search_engine_other_bundled
## 2.5821669 2.6371191
## timezone_cat_8_10 active_hours
## 2.7849004 3.0011758
## uri_count_max default_search_engine_Bing
## 3.0425253 3.3087244
## active_hours_max cpu_vendor_Other
## 3.5234456 3.5611403
## install_year is_default_browser_True
## 3.5748199 3.8511861
## cpu_speed_mhz cpu_l2_cache_kb_cat_l256
## 4.9043723 5.0541682
## cpu_l2_cache_kb_cat_g1024 startup_ms_max
## 5.2079448 5.3747128
## cpu_l2_cache_kb memory_mb
## 5.7860852 5.8859082
## default_search_engine_DuckDuckGo default_search_engine_Yahoo
## 6.0601589 6.1346705
## startup_ms timezone_cat_m2_0
## 6.7214823 7.9374573
## num_bookmarks daily_tabs_opened_max
## 8.5097075 8.6724245
## distro_id_norm_Yahoo daily_tabs_opened
## 9.2829201 10.5302528
## num_active_days default_search_engine_Google
## 10.6051678 10.6316415
## timezone_cat_m8_m6 daily_max_tabs_max
## 10.8641293 12.0406485
## cpu_cores daily_max_tabs
## 12.1931725 12.6614157
## distro_id_norm_other default_search_engine_other_nonbundled
## 14.8398878 14.9010701
## sync_configured_True distro_id_norm_acer
## 15.0311572 15.0406342
## fxa_configured_True timezone_cat_2_4
## 15.0578707 16.5569791
## session_length_max timezone_cat_6_8
## 17.0544888 17.8792833
## daily_num_sessions_started TIME_TO_DOM_CONTENT_LOADED_END_MS
## 18.2190489 18.5965690
## daily_num_sessions_started_max locale_enUS
## 18.9175991 21.2442500
## locale_enGB TIME_TO_NON_BLANK_PAINT_MS
## 21.2442500 22.2353914
## distro_id_norm_Mozilla session_length
## 23.2744249 23.6425689
## FX_PAGE_LOAD_MS_2_PARENT timezone_cat_4_6
## 24.6754281 26.7065422
## timezone_cat_0_2 TIME_TO_DOM_INTERACTIVE_MS
## 29.7954106 30.3508989
## TIME_TO_DOM_COMPLETE_MS timezone_cat_m6_m4
## 30.5367654 33.3323297
## TIME_TO_LOAD_EVENT_END_MS country_US
## 33.3626527 34.3889493
## timezone_offset is_wow64_True
## 47.7396020 57.6239622
## num_addons
## 78.8247568
cov <- df_4x_sm %>% dplyr::select(c(engagement, usage, environment, geo, settings, page_load, startup, stability))
treated <- (df_4x_sm$label_beta == 1)
std.diff <- apply(cov,2,function(x) 100*(mean(x[treated])- mean(x[!treated]))/(sqrt(0.5*(var(x[treated]) + var(x[!treated])))))
cov_df_4x <- sort(abs(std.diff))
cov_df_4x## search_count num_pages
## 0.2611103 0.4552318
## num_pages_max cpu_l2_cache_kb_cat_l512
## 0.4877402 0.5792728
## timezone_cat_m10_m8 search_count_max
## 0.8115738 1.1029769
## timezone_cat_10_12 profile_age
## 1.1192069 1.2774286
## uri_count_max timezone_cat_12_14
## 1.5620770 1.6906859
## uri_count cpu_vendor_Intel
## 1.7144621 2.0810512
## daily_unique_domains cpu_vendor_AMD
## 2.2466256 2.3030637
## daily_unique_domains_max default_search_engine_other_bundled
## 2.5590968 2.6635805
## timezone_cat_m4_m2 active_hours
## 2.9518260 3.0953197
## timezone_cat_m12_m10 active_hours_max
## 3.2091756 3.2930858
## is_default_browser_True cpu_vendor_Other
## 3.4986976 3.6492225
## cpu_l2_cache_kb_cat_l1024 install_year
## 3.9572564 4.5860672
## cpu_speed_mhz cpu_l2_cache_kb_cat_g1024
## 4.7776634 4.9072444
## startup_ms_max default_search_engine_Yahoo
## 4.9080849 5.0707447
## timezone_cat_8_10 default_search_engine_Bing
## 5.2823211 5.4118978
## memory_mb cpu_l2_cache_kb_cat_l256
## 5.5169692 5.9071723
## default_search_engine_DuckDuckGo cpu_l2_cache_kb
## 6.0286458 6.1564120
## startup_ms timezone_cat_m2_0
## 6.4499269 7.4002232
## num_bookmarks num_active_days
## 7.6129359 9.0400988
## default_search_engine_Google distro_id_norm_Yahoo
## 9.9662155 10.1267428
## daily_tabs_opened_max timezone_cat_m8_m6
## 10.2122122 11.0320170
## daily_tabs_opened cpu_cores
## 11.5441202 11.8970722
## default_search_engine_other_nonbundled daily_max_tabs_max
## 13.2803908 13.3467585
## distro_id_norm_other daily_max_tabs
## 13.7806560 13.8711494
## sync_configured_True fxa_configured_True
## 13.9381008 14.0668417
## distro_id_norm_acer timezone_cat_2_4
## 14.9424206 16.2433370
## timezone_cat_6_8 TIME_TO_DOM_CONTENT_LOADED_END_MS
## 17.0882473 18.2915328
## session_length_max daily_num_sessions_started
## 18.5277354 19.4230697
## daily_num_sessions_started_max TIME_TO_NON_BLANK_PAINT_MS
## 19.7753789 20.9533451
## locale_enUS locale_enGB
## 21.0365191 21.0365191
## distro_id_norm_Mozilla FX_PAGE_LOAD_MS_2_PARENT
## 22.9014136 24.9792322
## session_length timezone_cat_4_6
## 25.2290687 26.6260556
## TIME_TO_DOM_INTERACTIVE_MS timezone_cat_0_2
## 29.5229531 30.2842292
## TIME_TO_DOM_COMPLETE_MS timezone_cat_m6_m4
## 31.1201069 33.2968057
## TIME_TO_LOAD_EVENT_END_MS country_US
## 33.4957548 34.4629902
## timezone_offset is_wow64_True
## 48.0036668 55.8822584
## num_addons
## 80.0570732
timezone_cat_10_12,timezone_cat_m10_m8,cpu_l2_cache_kb_cat_l512 variables). These significant imbalances highlight the need for an effective matching strategy to create a release group that more closely resembles the beta group. Variables that create imbalance should be included in the selection model:Packages such as MatchIt estimates propensity scores using logistic regression as the default option. However, when estimating propensity scores using the default option, the fit of the model cannot be assessed. Therefore, it is recommended that a logistic regression is run to determine the model fit.
ps_1x <- list()
for (exp in names(exps)){
ps <- glm(generate_formula(exps[[exp]], label), data=df_1x_sm, family=binomial())
ps_1x[[exp]] <- ps
print(summary(ps))
}
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##
## Call:
## glm(formula = generate_formula(exps[[exp]], label), family = binomial(),
## data = df_1x_sm)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.1651 -0.9590 -0.0003 0.8511 6.2966
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.072e+00 4.720e-02 65.087 < 2e-16 ***
## daily_num_sessions_started 2.900e-04 8.552e-03 0.034 0.97295
## daily_num_sessions_started_max 1.248e-02 4.895e-03 2.549 0.01080 *
## FX_PAGE_LOAD_MS_2_PARENT 1.721e-05 9.422e-06 1.826 0.06781 .
## fxa_configured_True 1.037e+00 2.675e-02 38.772 < 2e-16 ***
## memory_mb 1.462e-05 1.299e-06 11.256 < 2e-16 ***
## num_active_days 9.537e-02 4.564e-03 20.897 < 2e-16 ***
## num_addons -5.323e-01 5.642e-03 -94.343 < 2e-16 ***
## num_bookmarks 4.940e-05 1.020e-05 4.841 1.29e-06 ***
## profile_age 7.685e-05 1.168e-05 6.581 4.68e-11 ***
## session_length -2.076e-02 1.661e-03 -12.497 < 2e-16 ***
## session_length_max 3.599e-03 7.427e-04 4.846 1.26e-06 ***
## TIME_TO_DOM_COMPLETE_MS 1.837e-05 1.333e-05 1.378 0.16823
## TIME_TO_DOM_CONTENT_LOADED_END_MS 1.650e-05 5.737e-06 2.876 0.00402 **
## TIME_TO_DOM_INTERACTIVE_MS -2.766e-05 1.389e-05 -1.991 0.04644 *
## TIME_TO_LOAD_EVENT_END_MS -1.301e-04 1.423e-05 -9.142 < 2e-16 ***
## TIME_TO_NON_BLANK_PAINT_MS 4.308e-05 8.738e-06 4.931 8.20e-07 ***
## timezone_cat_0_2 -7.612e-01 2.178e-02 -34.950 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 97041 on 69999 degrees of freedom
## Residual deviance: 76920 on 69982 degrees of freedom
## AIC: 76956
##
## Number of Fisher Scoring iterations: 5
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##
## Call:
## glm(formula = generate_formula(exps[[exp]], label), family = binomial(),
## data = df_1x_sm)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.6482 -0.9160 -0.0002 0.8239 6.4582
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.502e+00 7.117e-02 35.152 < 2e-16
## country_US 8.436e-01 5.432e-02 15.531 < 2e-16
## daily_num_sessions_started 3.339e-03 8.705e-03 0.384 0.701336
## daily_num_sessions_started_max 1.118e-02 4.981e-03 2.244 0.024820
## default_search_engine_other_nonbundled 1.426e+00 3.655e-02 39.019 < 2e-16
## FX_PAGE_LOAD_MS_2_PARENT 1.314e-05 9.620e-06 1.366 0.172036
## fxa_configured_True 8.924e-02 4.760e-02 1.875 0.060850
## memory_mb 1.691e-05 1.338e-06 12.638 < 2e-16
## num_active_days 9.260e-02 4.656e-03 19.890 < 2e-16
## num_addons -5.941e-01 6.030e-03 -98.524 < 2e-16
## num_bookmarks 5.639e-05 1.048e-05 5.381 7.39e-08
## profile_age 7.293e-05 1.189e-05 6.134 8.57e-10
## session_length -1.849e-02 1.661e-03 -11.129 < 2e-16
## session_length_max 2.801e-03 7.411e-04 3.779 0.000157
## startup_ms -7.698e-07 2.994e-07 -2.571 0.010126
## startup_ms_max 5.029e-08 5.258e-08 0.957 0.338795
## sync_configured_True 1.136e+00 4.777e-02 23.771 < 2e-16
## TIME_TO_DOM_COMPLETE_MS -1.598e-05 1.369e-05 -1.168 0.242984
## TIME_TO_DOM_CONTENT_LOADED_END_MS 7.629e-06 5.762e-06 1.324 0.185523
## TIME_TO_DOM_INTERACTIVE_MS -2.320e-05 1.412e-05 -1.643 0.100407
## TIME_TO_LOAD_EVENT_END_MS -8.904e-05 1.453e-05 -6.127 8.98e-10
## TIME_TO_NON_BLANK_PAINT_MS 4.521e-05 8.898e-06 5.081 3.75e-07
## timezone_cat_0_2 3.276e-02 5.515e-02 0.594 0.552565
##
## (Intercept) ***
## country_US ***
## daily_num_sessions_started
## daily_num_sessions_started_max *
## default_search_engine_other_nonbundled ***
## FX_PAGE_LOAD_MS_2_PARENT
## fxa_configured_True .
## memory_mb ***
## num_active_days ***
## num_addons ***
## num_bookmarks ***
## profile_age ***
## session_length ***
## session_length_max ***
## startup_ms *
## startup_ms_max
## sync_configured_True ***
## TIME_TO_DOM_COMPLETE_MS
## TIME_TO_DOM_CONTENT_LOADED_END_MS
## TIME_TO_DOM_INTERACTIVE_MS
## TIME_TO_LOAD_EVENT_END_MS ***
## TIME_TO_NON_BLANK_PAINT_MS ***
## timezone_cat_0_2
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 97041 on 69999 degrees of freedom
## Residual deviance: 74394 on 69977 degrees of freedom
## AIC: 74440
##
## Number of Fisher Scoring iterations: 6
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##
## Call:
## glm(formula = generate_formula(exps[[exp]], label), family = binomial(),
## data = df_1x_sm)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.9570 -1.0252 -0.0006 0.8785 6.0819
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.770e+00 4.565e-02 60.680 < 2e-16 ***
## daily_num_sessions_started 3.293e-03 8.430e-03 0.391 0.69606
## daily_num_sessions_started_max 1.367e-02 4.816e-03 2.839 0.00453 **
## FX_PAGE_LOAD_MS_2_PARENT 3.226e-05 9.254e-06 3.486 0.00049 ***
## memory_mb 1.893e-05 1.302e-06 14.540 < 2e-16 ***
## num_active_days 9.663e-02 4.465e-03 21.640 < 2e-16 ***
## num_addons -4.972e-01 5.423e-03 -91.689 < 2e-16 ***
## num_bookmarks 8.038e-05 1.001e-05 8.032 9.63e-16 ***
## profile_age 5.429e-05 1.141e-05 4.760 1.94e-06 ***
## session_length -2.546e-02 1.685e-03 -15.110 < 2e-16 ***
## session_length_max 5.212e-03 7.604e-04 6.855 7.14e-12 ***
## TIME_TO_DOM_COMPLETE_MS 4.101e-05 1.312e-05 3.125 0.00178 **
## TIME_TO_DOM_CONTENT_LOADED_END_MS 2.991e-05 5.694e-06 5.253 1.50e-07 ***
## TIME_TO_DOM_INTERACTIVE_MS -4.012e-05 1.365e-05 -2.939 0.00330 **
## TIME_TO_LOAD_EVENT_END_MS -1.554e-04 1.410e-05 -11.017 < 2e-16 ***
## TIME_TO_NON_BLANK_PAINT_MS 3.855e-05 8.674e-06 4.445 8.80e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 97041 on 69999 degrees of freedom
## Residual deviance: 79787 on 69984 degrees of freedom
## AIC: 79819
##
## Number of Fisher Scoring iterations: 5
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##
## Call:
## glm(formula = generate_formula(exps[[exp]], label), family = binomial(),
## data = df_1x_sm)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.4236 -0.9967 -0.0003 0.8610 6.1858
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.899e+00 6.084e-02 47.655 < 2e-16
## cpu_speed_mhz 1.628e-05 1.478e-05 1.102 0.27061
## daily_num_sessions_started 3.786e-03 8.510e-03 0.445 0.65640
## daily_num_sessions_started_max 1.200e-02 4.860e-03 2.470 0.01351
## default_search_engine_other_nonbundled 1.416e+00 3.546e-02 39.931 < 2e-16
## FX_PAGE_LOAD_MS_2_PARENT 3.056e-05 9.508e-06 3.214 0.00131
## memory_mb 2.239e-05 1.378e-06 16.248 < 2e-16
## num_active_days 9.800e-02 4.524e-03 21.662 < 2e-16
## num_addons -5.453e-01 5.735e-03 -95.092 < 2e-16
## num_bookmarks 9.586e-05 1.033e-05 9.282 < 2e-16
## profile_age 4.589e-05 1.158e-05 3.964 7.37e-05
## session_length -2.278e-02 1.668e-03 -13.658 < 2e-16
## session_length_max 4.500e-03 7.473e-04 6.021 1.73e-09
## startup_ms -1.174e-06 3.285e-07 -3.575 0.00035
## startup_ms_max 1.111e-07 5.292e-08 2.100 0.03574
## TIME_TO_DOM_COMPLETE_MS 7.612e-06 1.347e-05 0.565 0.57200
## TIME_TO_DOM_CONTENT_LOADED_END_MS 2.243e-05 5.689e-06 3.942 8.08e-05
## TIME_TO_DOM_INTERACTIVE_MS -3.523e-05 1.384e-05 -2.546 0.01088
## TIME_TO_LOAD_EVENT_END_MS -1.208e-04 1.437e-05 -8.407 < 2e-16
## TIME_TO_NON_BLANK_PAINT_MS 4.358e-05 8.766e-06 4.971 6.65e-07
##
## (Intercept) ***
## cpu_speed_mhz
## daily_num_sessions_started
## daily_num_sessions_started_max *
## default_search_engine_other_nonbundled ***
## FX_PAGE_LOAD_MS_2_PARENT **
## memory_mb ***
## num_active_days ***
## num_addons ***
## num_bookmarks ***
## profile_age ***
## session_length ***
## session_length_max ***
## startup_ms ***
## startup_ms_max *
## TIME_TO_DOM_COMPLETE_MS
## TIME_TO_DOM_CONTENT_LOADED_END_MS ***
## TIME_TO_DOM_INTERACTIVE_MS *
## TIME_TO_LOAD_EVENT_END_MS ***
## TIME_TO_NON_BLANK_PAINT_MS ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 97041 on 69999 degrees of freedom
## Residual deviance: 77957 on 69980 degrees of freedom
## AIC: 77997
##
## Number of Fisher Scoring iterations: 6
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##
## Call:
## glm(formula = generate_formula(exps[[exp]], label), family = binomial(),
## data = df_1x_sm)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.9663 -0.7020 0.0000 0.6868 6.3793
##
## Coefficients: (4 not defined because of singularities)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.181e+12 1.349e+12 0.875 0.38141
## cpu_l2_cache_kb_cat_l512 -2.018e-01 6.717e-02 -3.004 0.00267
## profile_age -8.680e-05 1.485e-05 -5.846 5.05e-09
## timezone_cat_12_14 -6.042e+01 3.355e+07 0.000 1.00000
## cpu_vendor_Intel -1.181e+12 1.349e+12 -0.875 0.38141
## default_search_engine_other_bundled -1.253e+00 2.263e-01 -5.536 3.10e-08
## timezone_cat_m4_m2 -4.613e-01 3.865e-01 -1.193 0.23275
## cpu_vendor_AMD -1.181e+12 1.349e+12 -0.875 0.38141
## default_search_engine_Bing -1.604e+00 9.460e-02 -16.955 < 2e-16
## timezone_cat_m12_m10 7.517e-01 2.690e-01 2.794 0.00521
## timezone_cat_8_10 -9.267e-01 4.540e-01 -2.041 0.04122
## cpu_l2_cache_kb_cat_l1024 -1.099e-01 4.776e-02 -2.300 0.02143
## install_year 2.155e-02 4.929e-03 4.373 1.23e-05
## is_default_browser_True 2.051e-01 2.058e-02 9.967 < 2e-16
## cpu_speed_mhz -9.222e-06 1.728e-05 -0.534 0.59354
## cpu_l2_cache_kb_cat_g1024 -1.523e-01 7.396e-02 -2.059 0.03952
## cpu_vendor_Other -1.181e+12 1.349e+12 -0.875 0.38141
## default_search_engine_DuckDuckGo -9.969e-01 6.389e-02 -15.602 < 2e-16
## cpu_l2_cache_kb_cat_l256 NA NA NA NA
## memory_mb 4.118e-06 1.589e-06 2.592 0.00953
## cpu_l2_cache_kb -3.995e-05 2.199e-05 -1.817 0.06918
## startup_ms_max 2.134e-08 5.363e-08 0.398 0.69063
## default_search_engine_Yahoo 2.017e+01 4.300e+04 0.000 0.99963
## startup_ms -5.958e-07 2.785e-07 -2.139 0.03242
## num_bookmarks 3.449e-05 1.074e-05 3.210 0.00133
## timezone_cat_m2_0 -1.220e+00 3.078e-01 -3.964 7.36e-05
## num_active_days 5.358e-02 5.174e-03 10.356 < 2e-16
## distro_id_norm_Yahoo 3.551e+01 5.409e+04 0.001 0.99948
## cpu_cores 4.787e-02 8.367e-03 5.721 1.06e-08
## default_search_engine_Google -1.426e+00 3.989e-02 -35.739 < 2e-16
## timezone_cat_m8_m6 2.148e-01 1.999e-01 1.075 0.28258
## distro_id_norm_other 5.737e+00 8.262e-01 6.944 3.81e-12
## default_search_engine_other_nonbundled NA NA NA NA
## distro_id_norm_acer 7.188e+00 7.485e-01 9.603 < 2e-16
## sync_configured_True 1.005e+00 5.105e-02 19.696 < 2e-16
## fxa_configured_True 7.782e-02 5.107e-02 1.524 0.12760
## daily_num_sessions_started 1.685e-02 9.584e-03 1.758 0.07869
## timezone_cat_6_8 -1.461e+00 3.602e-01 -4.056 4.99e-05
## timezone_cat_2_4 -1.945e+00 3.094e-01 -6.285 3.28e-10
## session_length_max 1.639e-03 8.392e-04 1.953 0.05078
## daily_num_sessions_started_max 4.961e-03 5.442e-03 0.912 0.36193
## TIME_TO_DOM_CONTENT_LOADED_END_MS -5.168e-06 6.174e-06 -0.837 0.40254
## TIME_TO_NON_BLANK_PAINT_MS 1.644e-05 9.386e-06 1.752 0.07983
## locale_enUS -2.352e+00 4.844e-02 -48.555 < 2e-16
## locale_enGB NA NA NA NA
## distro_id_norm_Mozilla NA NA NA NA
## FX_PAGE_LOAD_MS_2_PARENT 6.051e-05 1.080e-05 5.601 2.13e-08
## session_length -1.696e-02 1.840e-03 -9.219 < 2e-16
## timezone_cat_4_6 -1.726e+00 3.090e-01 -5.585 2.34e-08
## timezone_cat_0_2 -1.346e+00 2.609e-01 -5.160 2.47e-07
## TIME_TO_DOM_INTERACTIVE_MS 7.579e-06 1.536e-05 0.493 0.62175
## TIME_TO_DOM_COMPLETE_MS -3.993e-05 1.527e-05 -2.615 0.00891
## timezone_cat_m6_m4 1.737e-01 2.038e-01 0.852 0.39399
## TIME_TO_LOAD_EVENT_END_MS -2.855e-05 1.623e-05 -1.759 0.07850
## country_US 5.058e-01 7.496e-02 6.748 1.50e-11
## timezone_offset 4.778e-04 3.297e-04 1.449 0.14731
## is_wow64_True -1.890e+00 3.237e-02 -58.384 < 2e-16
## num_addons -5.948e-01 6.319e-03 -94.133 < 2e-16
##
## (Intercept)
## cpu_l2_cache_kb_cat_l512 **
## profile_age ***
## timezone_cat_12_14
## cpu_vendor_Intel
## default_search_engine_other_bundled ***
## timezone_cat_m4_m2
## cpu_vendor_AMD
## default_search_engine_Bing ***
## timezone_cat_m12_m10 **
## timezone_cat_8_10 *
## cpu_l2_cache_kb_cat_l1024 *
## install_year ***
## is_default_browser_True ***
## cpu_speed_mhz
## cpu_l2_cache_kb_cat_g1024 *
## cpu_vendor_Other
## default_search_engine_DuckDuckGo ***
## cpu_l2_cache_kb_cat_l256
## memory_mb **
## cpu_l2_cache_kb .
## startup_ms_max
## default_search_engine_Yahoo
## startup_ms *
## num_bookmarks **
## timezone_cat_m2_0 ***
## num_active_days ***
## distro_id_norm_Yahoo
## cpu_cores ***
## default_search_engine_Google ***
## timezone_cat_m8_m6
## distro_id_norm_other ***
## default_search_engine_other_nonbundled
## distro_id_norm_acer ***
## sync_configured_True ***
## fxa_configured_True
## daily_num_sessions_started .
## timezone_cat_6_8 ***
## timezone_cat_2_4 ***
## session_length_max .
## daily_num_sessions_started_max
## TIME_TO_DOM_CONTENT_LOADED_END_MS
## TIME_TO_NON_BLANK_PAINT_MS .
## locale_enUS ***
## locale_enGB
## distro_id_norm_Mozilla
## FX_PAGE_LOAD_MS_2_PARENT ***
## session_length ***
## timezone_cat_4_6 ***
## timezone_cat_0_2 ***
## TIME_TO_DOM_INTERACTIVE_MS
## TIME_TO_DOM_COMPLETE_MS **
## timezone_cat_m6_m4
## TIME_TO_LOAD_EVENT_END_MS .
## country_US ***
## timezone_offset
## is_wow64_True ***
## num_addons ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 97041 on 69999 degrees of freedom
## Residual deviance: 64451 on 69946 degrees of freedom
## AIC: 64559
##
## Number of Fisher Scoring iterations: 25
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##
## Call:
## glm(formula = generate_formula(exps[[exp]], label), family = binomial(),
## data = df_1x_sm)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.9676 -0.7044 0.0000 0.6847 6.3750
##
## Coefficients: (5 not defined because of singularities)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.721e+11 2.858e+11 1.302 0.19288
## profile_age -8.718e-05 1.484e-05 -5.873 4.28e-09
## cpu_l2_cache_kb_cat_l512 -6.678e-02 6.828e-02 -0.978 0.32801
## timezone_cat_10_12 -1.831e-01 8.277e-01 -0.221 0.82494
## timezone_cat_12_14 -2.735e+01 3.862e+05 0.000 0.99994
## cpu_vendor_Intel -3.721e+11 2.858e+11 -1.302 0.19288
## cpu_vendor_AMD -3.721e+11 2.858e+11 -1.302 0.19288
## timezone_cat_m4_m2 -6.352e-01 3.259e-01 -1.949 0.05126
## cpu_l2_cache_kb_cat_l1024 4.237e-02 5.787e-02 0.732 0.46406
## timezone_cat_m10_m8 -1.727e-01 2.232e-01 -0.774 0.43908
## timezone_cat_m12_m10 5.786e-01 2.146e-01 2.697 0.00700
## default_search_engine_other_bundled -1.253e+00 2.263e-01 -5.536 3.09e-08
## timezone_cat_8_10 -1.102e+00 4.126e-01 -2.671 0.00757
## default_search_engine_Bing -1.604e+00 9.460e-02 -16.955 < 2e-16
## cpu_vendor_Other -3.721e+11 2.858e+11 -1.302 0.19288
## install_year 2.157e-02 4.928e-03 4.377 1.20e-05
## is_default_browser_True 2.052e-01 2.058e-02 9.972 < 2e-16
## cpu_speed_mhz -9.313e-06 1.728e-05 -0.539 0.58991
## cpu_l2_cache_kb_cat_l256 1.524e-01 7.396e-02 2.061 0.03933
## cpu_l2_cache_kb_cat_g1024 NA NA NA NA
## startup_ms_max 2.120e-08 5.364e-08 0.395 0.69268
## cpu_l2_cache_kb -3.994e-05 2.199e-05 -1.817 0.06927
## memory_mb 4.117e-06 1.589e-06 2.592 0.00955
## default_search_engine_DuckDuckGo -9.968e-01 6.389e-02 -15.601 < 2e-16
## default_search_engine_Yahoo 2.019e+01 4.338e+04 0.000 0.99963
## startup_ms -5.949e-07 2.786e-07 -2.136 0.03271
## timezone_cat_m2_0 -1.395e+00 2.219e-01 -6.285 3.27e-10
## num_bookmarks 3.449e-05 1.074e-05 3.210 0.00133
## distro_id_norm_Yahoo 3.549e+01 5.354e+04 0.001 0.99947
## num_active_days 5.358e-02 5.174e-03 10.356 < 2e-16
## default_search_engine_Google -1.425e+00 3.988e-02 -35.740 < 2e-16
## timezone_cat_m8_m6 4.150e-02 6.169e-02 0.673 0.50108
## cpu_cores 4.786e-02 8.366e-03 5.721 1.06e-08
## distro_id_norm_other 5.737e+00 8.258e-01 6.948 3.71e-12
## default_search_engine_other_nonbundled NA NA NA NA
## sync_configured_True 1.005e+00 5.105e-02 19.693 < 2e-16
## distro_id_norm_acer 7.189e+00 7.486e-01 9.603 < 2e-16
## fxa_configured_True 7.794e-02 5.107e-02 1.526 0.12701
## timezone_cat_2_4 -2.119e+00 2.249e-01 -9.421 < 2e-16
## session_length_max 1.639e-03 8.392e-04 1.954 0.05075
## timezone_cat_6_8 -1.636e+00 2.988e-01 -5.475 4.37e-08
## daily_num_sessions_started 1.684e-02 9.583e-03 1.757 0.07891
## TIME_TO_DOM_CONTENT_LOADED_END_MS -5.171e-06 6.173e-06 -0.838 0.40220
## daily_num_sessions_started_max 4.966e-03 5.442e-03 0.913 0.36147
## locale_enUS -2.352e+00 4.844e-02 -48.556 < 2e-16
## locale_enGB NA NA NA NA
## TIME_TO_NON_BLANK_PAINT_MS 1.642e-05 9.387e-06 1.749 0.08022
## distro_id_norm_Mozilla NA NA NA NA
## session_length -1.697e-02 1.840e-03 -9.222 < 2e-16
## FX_PAGE_LOAD_MS_2_PARENT 6.054e-05 1.080e-05 5.604 2.09e-08
## timezone_cat_4_6 -1.901e+00 2.268e-01 -8.381 < 2e-16
## timezone_cat_0_2 -1.520e+00 1.513e-01 -10.045 < 2e-16
## TIME_TO_DOM_INTERACTIVE_MS 7.657e-06 1.536e-05 0.498 0.61815
## TIME_TO_DOM_COMPLETE_MS -3.999e-05 1.527e-05 -2.619 0.00881
## timezone_cat_m6_m4 NA NA NA NA
## TIME_TO_LOAD_EVENT_END_MS -2.853e-05 1.623e-05 -1.758 0.07877
## country_US 5.058e-01 7.517e-02 6.729 1.71e-11
## timezone_offset 4.795e-04 3.856e-04 1.243 0.21371
## is_wow64_True -1.890e+00 3.237e-02 -58.385 < 2e-16
## num_addons -5.948e-01 6.319e-03 -94.136 < 2e-16
##
## (Intercept)
## profile_age ***
## cpu_l2_cache_kb_cat_l512
## timezone_cat_10_12
## timezone_cat_12_14
## cpu_vendor_Intel
## cpu_vendor_AMD
## timezone_cat_m4_m2 .
## cpu_l2_cache_kb_cat_l1024
## timezone_cat_m10_m8
## timezone_cat_m12_m10 **
## default_search_engine_other_bundled ***
## timezone_cat_8_10 **
## default_search_engine_Bing ***
## cpu_vendor_Other
## install_year ***
## is_default_browser_True ***
## cpu_speed_mhz
## cpu_l2_cache_kb_cat_l256 *
## cpu_l2_cache_kb_cat_g1024
## startup_ms_max
## cpu_l2_cache_kb .
## memory_mb **
## default_search_engine_DuckDuckGo ***
## default_search_engine_Yahoo
## startup_ms *
## timezone_cat_m2_0 ***
## num_bookmarks **
## distro_id_norm_Yahoo
## num_active_days ***
## default_search_engine_Google ***
## timezone_cat_m8_m6
## cpu_cores ***
## distro_id_norm_other ***
## default_search_engine_other_nonbundled
## sync_configured_True ***
## distro_id_norm_acer ***
## fxa_configured_True
## timezone_cat_2_4 ***
## session_length_max .
## timezone_cat_6_8 ***
## daily_num_sessions_started .
## TIME_TO_DOM_CONTENT_LOADED_END_MS
## daily_num_sessions_started_max
## locale_enUS ***
## locale_enGB
## TIME_TO_NON_BLANK_PAINT_MS .
## distro_id_norm_Mozilla
## session_length ***
## FX_PAGE_LOAD_MS_2_PARENT ***
## timezone_cat_4_6 ***
## timezone_cat_0_2 ***
## TIME_TO_DOM_INTERACTIVE_MS
## TIME_TO_DOM_COMPLETE_MS **
## timezone_cat_m6_m4
## TIME_TO_LOAD_EVENT_END_MS .
## country_US ***
## timezone_offset
## is_wow64_True ***
## num_addons ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 97041 on 69999 degrees of freedom
## Residual deviance: 64451 on 69945 degrees of freedom
## AIC: 64561
##
## Number of Fisher Scoring iterations: 25Statistically significant estimates are identified by low (i.e., \(< 0.05\)) p-values. There are no clear suggestions as to whether to include in the final model all the variables (even non-significant). Some authors suggest that the final model should include not only statistically significant variables, but also variables known to be associated with selection.
Once the propensity scores have been calculated, a graphical approach can be used to assess the distributional similarity between score distributions. This graphical approach uses back to back histograms such as those created through the package Hmisc. Back to back histograms cannot be used with Mahalanobis distance, because it is a multidimensional technique.
par(mfrow=c(2,3))
for (ps in ps_1x){
df_1x_sm$psvalue <- predict(ps, type="response")
out <- histbackback(split(df_1x_sm$psvalue, df_1x_sm$label_release), main="Propensity score before matching", xlab=c("release", "beta"))
# just adding color
barplot(-out$left, col="#111d5e" , horiz=TRUE, space=0, add=TRUE, axes=FALSE)
barplot(out$right, col="#b21f66", horiz=TRUE, space=0, add=TRUE, axes=FALSE)
}ps_2x <- list()
for (exp in names(exps)){
ps <- glm(generate_formula(exps[[exp]], label), data=df_2x_sm, family=binomial())
ps_2x[[exp]] <- ps
print(summary(ps))
}
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##
## Call:
## glm(formula = generate_formula(exps[[exp]], label), family = binomial(),
## data = df_2x_sm)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.2009 -0.8073 -0.4246 0.8170 7.3992
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.244e+00 5.414e-02 59.915 < 2e-16 ***
## daily_num_sessions_started 1.693e-02 8.685e-03 1.950 0.051196 .
## daily_num_sessions_started_max 7.283e-03 4.947e-03 1.472 0.140973
## FX_PAGE_LOAD_MS_2_PARENT 1.831e-08 1.015e-05 0.002 0.998561
## fxa_configured_True 1.027e+00 2.810e-02 36.570 < 2e-16 ***
## memory_mb 1.757e-05 1.354e-06 12.983 < 2e-16 ***
## num_active_days 1.047e-01 4.857e-03 21.560 < 2e-16 ***
## num_addons -6.824e-01 7.224e-03 -94.470 < 2e-16 ***
## num_bookmarks 3.887e-05 1.058e-05 3.673 0.000240 ***
## profile_age 7.245e-05 1.235e-05 5.867 4.44e-09 ***
## session_length -1.658e-02 1.714e-03 -9.672 < 2e-16 ***
## session_length_max 2.726e-03 7.552e-04 3.609 0.000307 ***
## TIME_TO_DOM_COMPLETE_MS 1.748e-05 1.401e-05 1.247 0.212258
## TIME_TO_DOM_CONTENT_LOADED_END_MS 2.356e-06 5.923e-06 0.398 0.690733
## TIME_TO_DOM_INTERACTIVE_MS 2.191e-05 1.488e-05 1.472 0.140896
## TIME_TO_LOAD_EVENT_END_MS -1.299e-04 1.502e-05 -8.650 < 2e-16 ***
## TIME_TO_NON_BLANK_PAINT_MS 2.006e-05 9.665e-06 2.075 0.037970 *
## timezone_cat_0_2 -7.558e-01 2.375e-02 -31.828 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 89240 on 69999 degrees of freedom
## Residual deviance: 68797 on 69982 degrees of freedom
## AIC: 68833
##
## Number of Fisher Scoring iterations: 6
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##
## Call:
## glm(formula = generate_formula(exps[[exp]], label), family = binomial(),
## data = df_2x_sm)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.7751 -0.7717 -0.3903 0.7684 7.5588
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.680e+00 7.990e-02 33.545 < 2e-16
## country_US 9.198e-01 5.923e-02 15.527 < 2e-16
## daily_num_sessions_started 1.636e-02 8.890e-03 1.840 0.065760
## daily_num_sessions_started_max 7.045e-03 5.068e-03 1.390 0.164434
## default_search_engine_other_nonbundled 1.597e+00 3.819e-02 41.807 < 2e-16
## FX_PAGE_LOAD_MS_2_PARENT -8.889e-07 1.045e-05 -0.085 0.932186
## fxa_configured_True -1.087e-02 5.228e-02 -0.208 0.835258
## memory_mb 1.866e-05 1.393e-06 13.399 < 2e-16
## num_active_days 1.016e-01 4.975e-03 20.431 < 2e-16
## num_addons -7.560e-01 7.659e-03 -98.712 < 2e-16
## num_bookmarks 4.005e-05 1.091e-05 3.670 0.000242
## profile_age 6.950e-05 1.268e-05 5.479 4.27e-08
## session_length -1.427e-02 1.754e-03 -8.133 4.20e-16
## session_length_max 1.893e-03 7.837e-04 2.415 0.015738
## startup_ms -3.574e-06 6.558e-07 -5.450 5.04e-08
## startup_ms_max 4.510e-07 9.149e-08 4.929 8.25e-07
## sync_configured_True 1.233e+00 5.217e-02 23.631 < 2e-16
## TIME_TO_DOM_COMPLETE_MS -1.301e-05 1.461e-05 -0.891 0.373170
## TIME_TO_DOM_CONTENT_LOADED_END_MS -4.544e-06 6.040e-06 -0.752 0.451809
## TIME_TO_DOM_INTERACTIVE_MS 3.300e-05 1.522e-05 2.168 0.030159
## TIME_TO_LOAD_EVENT_END_MS -9.791e-05 1.553e-05 -6.303 2.92e-10
## TIME_TO_NON_BLANK_PAINT_MS 2.380e-05 1.008e-05 2.362 0.018155
## timezone_cat_0_2 1.102e-01 5.991e-02 1.840 0.065784
##
## (Intercept) ***
## country_US ***
## daily_num_sessions_started .
## daily_num_sessions_started_max
## default_search_engine_other_nonbundled ***
## FX_PAGE_LOAD_MS_2_PARENT
## fxa_configured_True
## memory_mb ***
## num_active_days ***
## num_addons ***
## num_bookmarks ***
## profile_age ***
## session_length ***
## session_length_max *
## startup_ms ***
## startup_ms_max ***
## sync_configured_True ***
## TIME_TO_DOM_COMPLETE_MS
## TIME_TO_DOM_CONTENT_LOADED_END_MS
## TIME_TO_DOM_INTERACTIVE_MS *
## TIME_TO_LOAD_EVENT_END_MS ***
## TIME_TO_NON_BLANK_PAINT_MS *
## timezone_cat_0_2 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 89240 on 69999 degrees of freedom
## Residual deviance: 66134 on 69977 degrees of freedom
## AIC: 66180
##
## Number of Fisher Scoring iterations: 7
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##
## Call:
## glm(formula = generate_formula(exps[[exp]], label), family = binomial(),
## data = df_2x_sm)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.1074 -0.8557 -0.4533 0.8651 7.2672
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.975e+00 5.289e-02 56.255 < 2e-16 ***
## daily_num_sessions_started 2.158e-02 8.546e-03 2.526 0.01155 *
## daily_num_sessions_started_max 8.048e-03 4.857e-03 1.657 0.09753 .
## FX_PAGE_LOAD_MS_2_PARENT 1.702e-05 9.974e-06 1.707 0.08788 .
## memory_mb 2.155e-05 1.347e-06 15.995 < 2e-16 ***
## num_active_days 1.060e-01 4.757e-03 22.286 < 2e-16 ***
## num_addons -6.548e-01 7.064e-03 -92.698 < 2e-16 ***
## num_bookmarks 7.049e-05 1.015e-05 6.947 3.72e-12 ***
## profile_age 5.534e-05 1.210e-05 4.575 4.77e-06 ***
## session_length -2.066e-02 1.670e-03 -12.371 < 2e-16 ***
## session_length_max 4.076e-03 7.192e-04 5.667 1.45e-08 ***
## TIME_TO_DOM_COMPLETE_MS 3.547e-05 1.376e-05 2.578 0.00993 **
## TIME_TO_DOM_CONTENT_LOADED_END_MS 1.613e-05 5.811e-06 2.776 0.00550 **
## TIME_TO_DOM_INTERACTIVE_MS 5.309e-06 1.467e-05 0.362 0.71744
## TIME_TO_LOAD_EVENT_END_MS -1.497e-04 1.483e-05 -10.094 < 2e-16 ***
## TIME_TO_NON_BLANK_PAINT_MS 1.495e-05 9.715e-06 1.539 0.12378
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 89240 on 69999 degrees of freedom
## Residual deviance: 71230 on 69984 degrees of freedom
## AIC: 71262
##
## Number of Fisher Scoring iterations: 6
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##
## Call:
## glm(formula = generate_formula(exps[[exp]], label), family = binomial(),
## data = df_2x_sm)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.5414 -0.8302 -0.4244 0.8287 7.3754
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.164e+00 6.844e-02 46.226 < 2e-16
## cpu_speed_mhz 2.653e-05 1.558e-05 1.703 0.0886
## daily_num_sessions_started 1.836e-02 8.686e-03 2.114 0.0345
## daily_num_sessions_started_max 7.404e-03 4.939e-03 1.499 0.1338
## default_search_engine_other_nonbundled 1.588e+00 3.717e-02 42.738 < 2e-16
## FX_PAGE_LOAD_MS_2_PARENT 1.727e-05 1.026e-05 1.683 0.0924
## memory_mb 2.373e-05 1.418e-06 16.738 < 2e-16
## num_active_days 1.070e-01 4.838e-03 22.122 < 2e-16
## num_addons -7.158e-01 7.438e-03 -96.234 < 2e-16
## num_bookmarks 8.225e-05 1.035e-05 7.949 1.87e-15
## profile_age 5.128e-05 1.238e-05 4.141 3.46e-05
## session_length -1.786e-02 1.672e-03 -10.686 < 2e-16
## session_length_max 3.351e-03 7.210e-04 4.648 3.35e-06
## startup_ms -4.479e-06 6.831e-07 -6.556 5.52e-11
## startup_ms_max 5.681e-07 9.319e-08 6.096 1.09e-09
## TIME_TO_DOM_COMPLETE_MS 7.213e-06 1.433e-05 0.503 0.6147
## TIME_TO_DOM_CONTENT_LOADED_END_MS 1.043e-05 5.884e-06 1.772 0.0763
## TIME_TO_DOM_INTERACTIVE_MS 1.415e-05 1.491e-05 0.949 0.3427
## TIME_TO_LOAD_EVENT_END_MS -1.218e-04 1.532e-05 -7.945 1.94e-15
## TIME_TO_NON_BLANK_PAINT_MS 2.219e-05 9.911e-06 2.239 0.0251
##
## (Intercept) ***
## cpu_speed_mhz .
## daily_num_sessions_started *
## daily_num_sessions_started_max
## default_search_engine_other_nonbundled ***
## FX_PAGE_LOAD_MS_2_PARENT .
## memory_mb ***
## num_active_days ***
## num_addons ***
## num_bookmarks ***
## profile_age ***
## session_length ***
## session_length_max ***
## startup_ms ***
## startup_ms_max ***
## TIME_TO_DOM_COMPLETE_MS
## TIME_TO_DOM_CONTENT_LOADED_END_MS .
## TIME_TO_DOM_INTERACTIVE_MS
## TIME_TO_LOAD_EVENT_END_MS ***
## TIME_TO_NON_BLANK_PAINT_MS *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 89240 on 69999 degrees of freedom
## Residual deviance: 69251 on 69980 degrees of freedom
## AIC: 69291
##
## Number of Fisher Scoring iterations: 7
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##
## Call:
## glm(formula = generate_formula(exps[[exp]], label), family = binomial(),
## data = df_2x_sm)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.8084 -0.6304 -0.2619 0.6414 7.3631
##
## Coefficients: (5 not defined because of singularities)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -5.647e+01 1.053e+01 -5.363 8.16e-08
## cpu_l2_cache_kb_cat_l512 -1.353e-01 6.814e-02 -1.985 0.047122
## profile_age -9.892e-05 1.581e-05 -6.256 3.94e-10
## timezone_cat_12_14 -1.123e+01 1.898e+02 -0.059 0.952799
## cpu_vendor_Intel 3.055e+00 7.425e-01 4.115 3.88e-05
## default_search_engine_other_bundled -1.345e+00 2.237e-01 -6.012 1.83e-09
## timezone_cat_m4_m2 -3.384e-01 3.983e-01 -0.850 0.395565
## cpu_vendor_AMD 3.164e+00 7.438e-01 4.254 2.10e-05
## default_search_engine_Bing -1.713e+00 1.006e-01 -17.018 < 2e-16
## timezone_cat_m12_m10 3.504e-01 2.725e-01 1.286 0.198575
## timezone_cat_8_10 -1.096e+00 4.956e-01 -2.212 0.026951
## cpu_l2_cache_kb_cat_l1024 -1.257e-01 5.136e-02 -2.447 0.014398
## install_year 3.005e-02 5.203e-03 5.775 7.71e-09
## is_default_browser_True 2.232e-01 2.175e-02 10.259 < 2e-16
## cpu_speed_mhz 4.009e-06 1.821e-05 0.220 0.825728
## cpu_l2_cache_kb_cat_g1024 -1.461e-01 8.005e-02 -1.825 0.068048
## cpu_vendor_Other NA NA NA NA
## default_search_engine_DuckDuckGo -1.167e+00 6.906e-02 -16.902 < 2e-16
## cpu_l2_cache_kb_cat_l256 NA NA NA NA
## memory_mb 5.832e-06 1.620e-06 3.600 0.000318
## cpu_l2_cache_kb -4.188e-05 2.401e-05 -1.744 0.081109
## startup_ms_max 3.431e-07 9.270e-08 3.701 0.000215
## default_search_engine_Yahoo 8.150e+00 6.138e+01 0.133 0.894364
## startup_ms -2.800e-06 6.425e-07 -4.358 1.31e-05
## num_bookmarks 1.566e-05 1.143e-05 1.371 0.170486
## timezone_cat_m2_0 -1.330e+00 3.345e-01 -3.977 6.99e-05
## num_active_days 6.318e-02 5.487e-03 11.516 < 2e-16
## distro_id_norm_Yahoo 7.901e+00 1.384e+00 5.709 1.14e-08
## cpu_cores 5.812e-02 8.584e-03 6.771 1.28e-11
## default_search_engine_Google -1.615e+00 4.170e-02 -38.726 < 2e-16
## timezone_cat_m8_m6 6.566e-04 1.986e-01 0.003 0.997362
## distro_id_norm_other 6.544e+00 6.837e-01 9.571 < 2e-16
## default_search_engine_other_nonbundled NA NA NA NA
## distro_id_norm_acer 7.925e+00 8.521e-01 9.300 < 2e-16
## sync_configured_True 1.119e+00 5.564e-02 20.115 < 2e-16
## fxa_configured_True -1.734e-02 5.584e-02 -0.311 0.756177
## daily_num_sessions_started 3.046e-02 9.768e-03 3.118 0.001820
## timezone_cat_6_8 -1.832e+00 3.996e-01 -4.585 4.54e-06
## timezone_cat_2_4 -2.029e+00 3.365e-01 -6.030 1.64e-09
## session_length_max 6.315e-04 9.178e-04 0.688 0.491426
## daily_num_sessions_started_max 1.961e-03 5.550e-03 0.353 0.723821
## TIME_TO_DOM_CONTENT_LOADED_END_MS -1.147e-05 6.502e-06 -1.764 0.077669
## TIME_TO_NON_BLANK_PAINT_MS 2.081e-06 1.111e-05 0.187 0.851477
## locale_enUS -2.284e+00 5.108e-02 -44.713 < 2e-16
## locale_enGB NA NA NA NA
## distro_id_norm_Mozilla NA NA NA NA
## FX_PAGE_LOAD_MS_2_PARENT 4.636e-05 1.174e-05 3.950 7.80e-05
## session_length -1.215e-02 1.965e-03 -6.181 6.39e-10
## timezone_cat_4_6 -1.933e+00 3.368e-01 -5.740 9.49e-09
## timezone_cat_0_2 -1.301e+00 2.796e-01 -4.655 3.23e-06
## TIME_TO_DOM_INTERACTIVE_MS 4.882e-05 1.646e-05 2.966 0.003019
## TIME_TO_DOM_COMPLETE_MS -3.641e-05 1.662e-05 -2.190 0.028489
## timezone_cat_m6_m4 -1.626e-02 2.063e-01 -0.079 0.937205
## TIME_TO_LOAD_EVENT_END_MS -3.230e-05 1.773e-05 -1.822 0.068400
## country_US 6.626e-01 8.620e-02 7.686 1.51e-14
## timezone_offset 4.175e-04 3.622e-04 1.153 0.249049
## is_wow64_True -1.949e+00 3.758e-02 -51.860 < 2e-16
## num_addons -7.451e-01 7.896e-03 -94.372 < 2e-16
##
## (Intercept) ***
## cpu_l2_cache_kb_cat_l512 *
## profile_age ***
## timezone_cat_12_14
## cpu_vendor_Intel ***
## default_search_engine_other_bundled ***
## timezone_cat_m4_m2
## cpu_vendor_AMD ***
## default_search_engine_Bing ***
## timezone_cat_m12_m10
## timezone_cat_8_10 *
## cpu_l2_cache_kb_cat_l1024 *
## install_year ***
## is_default_browser_True ***
## cpu_speed_mhz
## cpu_l2_cache_kb_cat_g1024 .
## cpu_vendor_Other
## default_search_engine_DuckDuckGo ***
## cpu_l2_cache_kb_cat_l256
## memory_mb ***
## cpu_l2_cache_kb .
## startup_ms_max ***
## default_search_engine_Yahoo
## startup_ms ***
## num_bookmarks
## timezone_cat_m2_0 ***
## num_active_days ***
## distro_id_norm_Yahoo ***
## cpu_cores ***
## default_search_engine_Google ***
## timezone_cat_m8_m6
## distro_id_norm_other ***
## default_search_engine_other_nonbundled
## distro_id_norm_acer ***
## sync_configured_True ***
## fxa_configured_True
## daily_num_sessions_started **
## timezone_cat_6_8 ***
## timezone_cat_2_4 ***
## session_length_max
## daily_num_sessions_started_max
## TIME_TO_DOM_CONTENT_LOADED_END_MS .
## TIME_TO_NON_BLANK_PAINT_MS
## locale_enUS ***
## locale_enGB
## distro_id_norm_Mozilla
## FX_PAGE_LOAD_MS_2_PARENT ***
## session_length ***
## timezone_cat_4_6 ***
## timezone_cat_0_2 ***
## TIME_TO_DOM_INTERACTIVE_MS **
## TIME_TO_DOM_COMPLETE_MS *
## timezone_cat_m6_m4
## TIME_TO_LOAD_EVENT_END_MS .
## country_US ***
## timezone_offset
## is_wow64_True ***
## num_addons ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 89240 on 69999 degrees of freedom
## Residual deviance: 57505 on 69947 degrees of freedom
## AIC: 57611
##
## Number of Fisher Scoring iterations: 12
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##
## Call:
## glm(formula = generate_formula(exps[[exp]], label), family = binomial(),
## data = df_2x_sm)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.8101 -0.6304 -0.2618 0.6414 7.3630
##
## Coefficients: (6 not defined because of singularities)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -5.648e+01 1.053e+01 -5.367 8.03e-08
## profile_age -9.915e-05 1.581e-05 -6.270 3.60e-10
## cpu_l2_cache_kb_cat_l512 1.105e-02 7.304e-02 0.151 0.879710
## timezone_cat_10_12 -1.673e+00 1.154e+00 -1.450 0.147126
## timezone_cat_12_14 -1.153e+01 1.899e+02 -0.061 0.951577
## cpu_vendor_Intel 3.053e+00 7.426e-01 4.111 3.94e-05
## cpu_vendor_AMD 3.161e+00 7.438e-01 4.250 2.14e-05
## timezone_cat_m4_m2 -3.488e-01 3.363e-01 -1.037 0.299584
## cpu_l2_cache_kb_cat_l1024 2.022e-02 6.213e-02 0.326 0.744790
## timezone_cat_m10_m8 1.393e-01 2.215e-01 0.629 0.529597
## timezone_cat_m12_m10 4.681e-01 2.269e-01 2.063 0.039156
## default_search_engine_other_bundled -1.346e+00 2.237e-01 -6.017 1.78e-09
## timezone_cat_8_10 -1.329e+00 4.406e-01 -3.016 0.002559
## default_search_engine_Bing -1.713e+00 1.006e-01 -17.022 < 2e-16
## cpu_vendor_Other NA NA NA NA
## install_year 3.002e-02 5.203e-03 5.768 8.00e-09
## is_default_browser_True 2.230e-01 2.175e-02 10.252 < 2e-16
## cpu_speed_mhz 4.374e-06 1.821e-05 0.240 0.810109
## cpu_l2_cache_kb_cat_l256 1.459e-01 8.005e-02 1.822 0.068414
## cpu_l2_cache_kb_cat_g1024 NA NA NA NA
## startup_ms_max 3.378e-07 9.311e-08 3.628 0.000285
## cpu_l2_cache_kb -4.199e-05 2.401e-05 -1.749 0.080300
## memory_mb 5.848e-06 1.621e-06 3.608 0.000309
## default_search_engine_DuckDuckGo -1.167e+00 6.907e-02 -16.901 < 2e-16
## default_search_engine_Yahoo 8.161e+00 6.136e+01 0.133 0.894199
## startup_ms -2.761e-06 6.454e-07 -4.278 1.89e-05
## timezone_cat_m2_0 -1.393e+00 2.478e-01 -5.622 1.89e-08
## num_bookmarks 1.561e-05 1.143e-05 1.366 0.171967
## distro_id_norm_Yahoo 7.904e+00 1.387e+00 5.699 1.20e-08
## num_active_days 6.310e-02 5.487e-03 11.501 < 2e-16
## default_search_engine_Google -1.615e+00 4.171e-02 -38.723 < 2e-16
## timezone_cat_m8_m6 5.957e-02 6.556e-02 0.909 0.363526
## cpu_cores 5.808e-02 8.585e-03 6.765 1.34e-11
## distro_id_norm_other 6.544e+00 6.839e-01 9.570 < 2e-16
## default_search_engine_other_nonbundled NA NA NA NA
## sync_configured_True 1.119e+00 5.564e-02 20.120 < 2e-16
## distro_id_norm_acer 7.927e+00 8.528e-01 9.295 < 2e-16
## fxa_configured_True -1.775e-02 5.584e-02 -0.318 0.750548
## timezone_cat_2_4 -2.146e+00 2.449e-01 -8.762 < 2e-16
## session_length_max 6.309e-04 9.176e-04 0.687 0.491771
## timezone_cat_6_8 -2.026e+00 3.260e-01 -6.216 5.11e-10
## daily_num_sessions_started 3.044e-02 9.768e-03 3.117 0.001829
## TIME_TO_DOM_CONTENT_LOADED_END_MS -1.143e-05 6.502e-06 -1.758 0.078701
## daily_num_sessions_started_max 2.008e-03 5.550e-03 0.362 0.717585
## locale_enUS -2.285e+00 5.108e-02 -44.726 < 2e-16
## locale_enGB NA NA NA NA
## TIME_TO_NON_BLANK_PAINT_MS 1.878e-06 1.112e-05 0.169 0.865827
## distro_id_norm_Mozilla NA NA NA NA
## session_length -1.215e-02 1.965e-03 -6.182 6.34e-10
## FX_PAGE_LOAD_MS_2_PARENT 4.627e-05 1.174e-05 3.942 8.07e-05
## timezone_cat_4_6 -2.080e+00 2.437e-01 -8.538 < 2e-16
## timezone_cat_0_2 -1.387e+00 1.638e-01 -8.469 < 2e-16
## TIME_TO_DOM_INTERACTIVE_MS 4.901e-05 1.646e-05 2.977 0.002909
## TIME_TO_DOM_COMPLETE_MS -3.642e-05 1.662e-05 -2.191 0.028429
## timezone_cat_m6_m4 NA NA NA NA
## TIME_TO_LOAD_EVENT_END_MS -3.220e-05 1.773e-05 -1.816 0.069300
## country_US 6.547e-01 8.637e-02 7.581 3.44e-14
## timezone_offset 7.084e-04 4.094e-04 1.730 0.083557
## is_wow64_True -1.949e+00 3.758e-02 -51.858 < 2e-16
## num_addons -7.451e-01 7.896e-03 -94.370 < 2e-16
##
## (Intercept) ***
## profile_age ***
## cpu_l2_cache_kb_cat_l512
## timezone_cat_10_12
## timezone_cat_12_14
## cpu_vendor_Intel ***
## cpu_vendor_AMD ***
## timezone_cat_m4_m2
## cpu_l2_cache_kb_cat_l1024
## timezone_cat_m10_m8
## timezone_cat_m12_m10 *
## default_search_engine_other_bundled ***
## timezone_cat_8_10 **
## default_search_engine_Bing ***
## cpu_vendor_Other
## install_year ***
## is_default_browser_True ***
## cpu_speed_mhz
## cpu_l2_cache_kb_cat_l256 .
## cpu_l2_cache_kb_cat_g1024
## startup_ms_max ***
## cpu_l2_cache_kb .
## memory_mb ***
## default_search_engine_DuckDuckGo ***
## default_search_engine_Yahoo
## startup_ms ***
## timezone_cat_m2_0 ***
## num_bookmarks
## distro_id_norm_Yahoo ***
## num_active_days ***
## default_search_engine_Google ***
## timezone_cat_m8_m6
## cpu_cores ***
## distro_id_norm_other ***
## default_search_engine_other_nonbundled
## sync_configured_True ***
## distro_id_norm_acer ***
## fxa_configured_True
## timezone_cat_2_4 ***
## session_length_max
## timezone_cat_6_8 ***
## daily_num_sessions_started **
## TIME_TO_DOM_CONTENT_LOADED_END_MS .
## daily_num_sessions_started_max
## locale_enUS ***
## locale_enGB
## TIME_TO_NON_BLANK_PAINT_MS
## distro_id_norm_Mozilla
## session_length ***
## FX_PAGE_LOAD_MS_2_PARENT ***
## timezone_cat_4_6 ***
## timezone_cat_0_2 ***
## TIME_TO_DOM_INTERACTIVE_MS **
## TIME_TO_DOM_COMPLETE_MS *
## timezone_cat_m6_m4
## TIME_TO_LOAD_EVENT_END_MS .
## country_US ***
## timezone_offset .
## is_wow64_True ***
## num_addons ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 89240 on 69999 degrees of freedom
## Residual deviance: 57502 on 69946 degrees of freedom
## AIC: 57610
##
## Number of Fisher Scoring iterations: 12par(mfrow=c(2,3))
for (ps in ps_2x){
df_2x_sm$psvalue <- predict(ps, type="response")
out <- histbackback(split(df_2x_sm$psvalue, df_2x_sm$label_release), main="Propensity score before matching", xlab=c("release", "beta"))
# just adding color
barplot(-out$left, col="#111d5e" , horiz=TRUE, space=0, add=TRUE, axes=FALSE)
barplot(out$right, col="#b21f66", horiz=TRUE, space=0, add=TRUE, axes=FALSE)
}ps_4x <- list()
for (exp in names(exps)){
ps <- glm(generate_formula(exps[[exp]], label), data=df_4x_sm, family=binomial())
ps_4x[[exp]] <- ps
print(summary(ps))
}
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##
## Call:
## glm(formula = generate_formula(exps[[exp]], label), family = binomial(),
## data = df_4x_sm)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.2993 -0.6366 -0.3792 -0.0494 7.7237
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.494e+00 6.763e-02 51.668 < 2e-16 ***
## daily_num_sessions_started 1.486e-02 9.681e-03 1.535 0.1248
## daily_num_sessions_started_max 8.353e-03 5.510e-03 1.516 0.1295
## FX_PAGE_LOAD_MS_2_PARENT -1.444e-06 1.182e-05 -0.122 0.9028
## fxa_configured_True 9.961e-01 3.195e-02 31.175 < 2e-16 ***
## memory_mb 1.640e-05 1.445e-06 11.351 < 2e-16 ***
## num_active_days 1.085e-01 5.668e-03 19.149 < 2e-16 ***
## num_addons -8.342e-01 9.677e-03 -86.199 < 2e-16 ***
## num_bookmarks 5.029e-05 1.162e-05 4.326 1.52e-05 ***
## profile_age 9.676e-05 1.422e-05 6.806 1.01e-11 ***
## session_length -1.753e-02 2.038e-03 -8.600 < 2e-16 ***
## session_length_max 2.623e-03 8.868e-04 2.958 0.0031 **
## TIME_TO_DOM_COMPLETE_MS -1.396e-05 1.752e-05 -0.797 0.4253
## TIME_TO_DOM_CONTENT_LOADED_END_MS 3.460e-06 6.600e-06 0.524 0.6001
## TIME_TO_DOM_INTERACTIVE_MS 3.890e-05 1.705e-05 2.282 0.0225 *
## TIME_TO_LOAD_EVENT_END_MS -1.076e-04 1.842e-05 -5.843 5.14e-09 ***
## TIME_TO_NON_BLANK_PAINT_MS 1.671e-05 1.066e-05 1.567 0.1171
## timezone_cat_0_2 -7.706e-01 2.850e-02 -27.039 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 70106 on 69999 degrees of freedom
## Residual deviance: 53279 on 69982 degrees of freedom
## AIC: 53315
##
## Number of Fisher Scoring iterations: 6
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##
## Call:
## glm(formula = generate_formula(exps[[exp]], label), family = binomial(),
## data = df_4x_sm)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.8186 -0.6127 -0.3550 -0.0401 8.4904
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.983e+00 9.652e-02 30.908 < 2e-16
## country_US 8.594e-01 6.969e-02 12.332 < 2e-16
## daily_num_sessions_started 1.534e-02 9.904e-03 1.549 0.121347
## daily_num_sessions_started_max 7.950e-03 5.639e-03 1.410 0.158634
## default_search_engine_other_nonbundled 1.551e+00 4.298e-02 36.080 < 2e-16
## FX_PAGE_LOAD_MS_2_PARENT -1.019e-06 1.215e-05 -0.084 0.933114
## fxa_configured_True -7.782e-02 6.292e-02 -1.237 0.216147
## memory_mb 1.682e-05 1.483e-06 11.338 < 2e-16
## num_active_days 1.051e-01 5.784e-03 18.168 < 2e-16
## num_addons -9.043e-01 1.010e-02 -89.566 < 2e-16
## num_bookmarks 5.491e-05 1.196e-05 4.591 4.41e-06
## profile_age 9.367e-05 1.458e-05 6.426 1.31e-10
## session_length -1.482e-02 2.093e-03 -7.078 1.46e-12
## session_length_max 1.721e-03 9.365e-04 1.838 0.066120
## startup_ms -3.655e-06 7.790e-07 -4.691 2.72e-06
## startup_ms_max 4.808e-07 1.065e-07 4.514 6.36e-06
## sync_configured_True 1.244e+00 6.241e-02 19.927 < 2e-16
## TIME_TO_DOM_COMPLETE_MS -4.448e-05 1.842e-05 -2.415 0.015722
## TIME_TO_DOM_CONTENT_LOADED_END_MS -4.129e-06 6.770e-06 -0.610 0.541868
## TIME_TO_DOM_INTERACTIVE_MS 4.335e-05 1.745e-05 2.485 0.012951
## TIME_TO_LOAD_EVENT_END_MS -7.258e-05 1.925e-05 -3.771 0.000163
## TIME_TO_NON_BLANK_PAINT_MS 2.061e-05 1.109e-05 1.857 0.063245
## timezone_cat_0_2 3.119e-02 7.055e-02 0.442 0.658416
##
## (Intercept) ***
## country_US ***
## daily_num_sessions_started
## daily_num_sessions_started_max
## default_search_engine_other_nonbundled ***
## FX_PAGE_LOAD_MS_2_PARENT
## fxa_configured_True
## memory_mb ***
## num_active_days ***
## num_addons ***
## num_bookmarks ***
## profile_age ***
## session_length ***
## session_length_max .
## startup_ms ***
## startup_ms_max ***
## sync_configured_True ***
## TIME_TO_DOM_COMPLETE_MS *
## TIME_TO_DOM_CONTENT_LOADED_END_MS
## TIME_TO_DOM_INTERACTIVE_MS *
## TIME_TO_LOAD_EVENT_END_MS ***
## TIME_TO_NON_BLANK_PAINT_MS .
## timezone_cat_0_2
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 70106 on 69999 degrees of freedom
## Residual deviance: 51459 on 69977 degrees of freedom
## AIC: 51505
##
## Number of Fisher Scoring iterations: 8
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##
## Call:
## glm(formula = generate_formula(exps[[exp]], label), family = binomial(),
## data = df_4x_sm)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.1237 -0.6736 -0.4026 -0.0506 8.4904
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.267e+00 6.668e-02 49.003 < 2e-16 ***
## daily_num_sessions_started 1.813e-02 9.524e-03 1.904 0.0569 .
## daily_num_sessions_started_max 9.564e-03 5.424e-03 1.763 0.0779 .
## FX_PAGE_LOAD_MS_2_PARENT 1.447e-05 1.162e-05 1.245 0.2130
## memory_mb 2.027e-05 1.434e-06 14.136 < 2e-16 ***
## num_active_days 1.108e-01 5.571e-03 19.886 < 2e-16 ***
## num_addons -8.153e-01 9.572e-03 -85.177 < 2e-16 ***
## num_bookmarks 8.043e-05 1.094e-05 7.351 1.97e-13 ***
## profile_age 8.302e-05 1.399e-05 5.933 2.97e-09 ***
## session_length -2.127e-02 1.969e-03 -10.802 < 2e-16 ***
## session_length_max 3.724e-03 8.283e-04 4.496 6.91e-06 ***
## TIME_TO_DOM_COMPLETE_MS 1.061e-05 1.660e-05 0.639 0.5227
## TIME_TO_DOM_CONTENT_LOADED_END_MS 1.580e-05 6.448e-06 2.451 0.0143 *
## TIME_TO_DOM_INTERACTIVE_MS 2.152e-05 1.684e-05 1.279 0.2011
## TIME_TO_LOAD_EVENT_END_MS -1.331e-04 1.759e-05 -7.567 3.82e-14 ***
## TIME_TO_NON_BLANK_PAINT_MS 1.347e-05 1.075e-05 1.253 0.2103
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 70106 on 69999 degrees of freedom
## Residual deviance: 55010 on 69984 degrees of freedom
## AIC: 55042
##
## Number of Fisher Scoring iterations: 7
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##
## Call:
## glm(formula = generate_formula(exps[[exp]], label), family = binomial(),
## data = df_4x_sm)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.6416 -0.6559 -0.3845 -0.0415 8.4904
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.460e+00 8.343e-02 41.469 < 2e-16
## cpu_speed_mhz 2.440e-05 1.785e-05 1.367 0.171536
## daily_num_sessions_started 1.657e-02 9.676e-03 1.712 0.086836
## daily_num_sessions_started_max 8.506e-03 5.511e-03 1.543 0.122766
## default_search_engine_other_nonbundled 1.554e+00 4.197e-02 37.015 < 2e-16
## FX_PAGE_LOAD_MS_2_PARENT 1.677e-05 1.192e-05 1.407 0.159350
## memory_mb 2.169e-05 1.495e-06 14.510 < 2e-16
## num_active_days 1.112e-01 5.649e-03 19.685 < 2e-16
## num_addons -8.744e-01 9.931e-03 -88.046 < 2e-16
## num_bookmarks 8.969e-05 1.113e-05 8.061 7.54e-16
## profile_age 8.055e-05 1.430e-05 5.631 1.79e-08
## session_length -1.864e-02 1.981e-03 -9.407 < 2e-16
## session_length_max 3.037e-03 8.416e-04 3.608 0.000308
## startup_ms -4.361e-06 8.230e-07 -5.298 1.17e-07
## startup_ms_max 5.716e-07 1.104e-07 5.177 2.25e-07
## TIME_TO_DOM_COMPLETE_MS -1.587e-05 1.714e-05 -0.925 0.354727
## TIME_TO_DOM_CONTENT_LOADED_END_MS 1.082e-05 6.540e-06 1.654 0.098141
## TIME_TO_DOM_INTERACTIVE_MS 2.576e-05 1.709e-05 1.507 0.131691
## TIME_TO_LOAD_EVENT_END_MS -1.052e-04 1.809e-05 -5.814 6.10e-09
## TIME_TO_NON_BLANK_PAINT_MS 1.803e-05 1.098e-05 1.641 0.100705
##
## (Intercept) ***
## cpu_speed_mhz
## daily_num_sessions_started .
## daily_num_sessions_started_max
## default_search_engine_other_nonbundled ***
## FX_PAGE_LOAD_MS_2_PARENT
## memory_mb ***
## num_active_days ***
## num_addons ***
## num_bookmarks ***
## profile_age ***
## session_length ***
## session_length_max ***
## startup_ms ***
## startup_ms_max ***
## TIME_TO_DOM_COMPLETE_MS
## TIME_TO_DOM_CONTENT_LOADED_END_MS .
## TIME_TO_DOM_INTERACTIVE_MS
## TIME_TO_LOAD_EVENT_END_MS ***
## TIME_TO_NON_BLANK_PAINT_MS
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 70106 on 69999 degrees of freedom
## Residual deviance: 53654 on 69980 degrees of freedom
## AIC: 53694
##
## Number of Fisher Scoring iterations: 8
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##
## Call:
## glm(formula = generate_formula(exps[[exp]], label), family = binomial(),
## data = df_4x_sm)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -4.1270 -0.5250 -0.2521 -0.0357 8.4904
##
## Coefficients: (5 not defined because of singularities)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.196e+01 1.206e+01 -5.140 2.75e-07
## cpu_l2_cache_kb_cat_l512 -1.703e-01 7.911e-02 -2.153 0.031344
## profile_age -8.118e-05 1.807e-05 -4.493 7.01e-06
## timezone_cat_12_14 -1.040e+01 1.739e+02 -0.060 0.952331
## cpu_vendor_Intel 3.220e+00 1.024e+00 3.146 0.001657
## default_search_engine_other_bundled -1.659e+00 2.701e-01 -6.141 8.18e-10
## timezone_cat_m4_m2 -9.139e-01 5.273e-01 -1.733 0.083057
## cpu_vendor_AMD 3.327e+00 1.025e+00 3.247 0.001167
## default_search_engine_Bing -1.429e+00 1.112e-01 -12.848 < 2e-16
## timezone_cat_m12_m10 6.489e-01 3.034e-01 2.139 0.032467
## timezone_cat_8_10 -2.040e+00 6.970e-01 -2.926 0.003431
## cpu_l2_cache_kb_cat_l1024 -1.879e-01 6.049e-02 -3.106 0.001896
## install_year 3.277e-02 5.950e-03 5.507 3.66e-08
## is_default_browser_True 2.277e-01 2.499e-02 9.111 < 2e-16
## cpu_speed_mhz -1.850e-05 2.130e-05 -0.868 0.385206
## cpu_l2_cache_kb_cat_g1024 -1.589e-01 9.359e-02 -1.698 0.089500
## cpu_vendor_Other NA NA NA NA
## default_search_engine_DuckDuckGo -1.013e+00 7.933e-02 -12.768 < 2e-16
## cpu_l2_cache_kb_cat_l256 NA NA NA NA
## memory_mb 4.125e-06 1.869e-06 2.207 0.027296
## cpu_l2_cache_kb -5.277e-05 2.805e-05 -1.881 0.059994
## startup_ms_max 3.314e-07 9.443e-08 3.509 0.000449
## default_search_engine_Yahoo 9.325e+00 9.599e+01 0.097 0.922608
## startup_ms -2.493e-06 6.625e-07 -3.763 0.000168
## num_bookmarks 2.935e-05 1.240e-05 2.368 0.017883
## timezone_cat_m2_0 -1.061e+00 3.773e-01 -2.811 0.004938
## num_active_days 7.509e-02 6.310e-03 11.901 < 2e-16
## distro_id_norm_Yahoo 9.923e+00 1.264e+00 7.851 4.13e-15
## cpu_cores 5.909e-02 9.616e-03 6.145 8.00e-10
## default_search_engine_Google -1.580e+00 4.689e-02 -33.682 < 2e-16
## timezone_cat_m8_m6 2.041e-01 2.230e-01 0.915 0.360134
## distro_id_norm_other 5.195e+00 5.364e-01 9.685 < 2e-16
## default_search_engine_other_nonbundled NA NA NA NA
## distro_id_norm_acer 7.408e+00 6.499e-01 11.400 < 2e-16
## sync_configured_True 1.113e+00 6.597e-02 16.866 < 2e-16
## fxa_configured_True -6.689e-02 6.647e-02 -1.006 0.314283
## daily_num_sessions_started 2.746e-02 1.086e-02 2.530 0.011408
## timezone_cat_6_8 -1.386e+00 4.429e-01 -3.129 0.001755
## timezone_cat_2_4 -1.838e+00 3.836e-01 -4.792 1.65e-06
## session_length_max 6.120e-04 1.079e-03 0.567 0.570751
## daily_num_sessions_started_max 3.313e-03 6.190e-03 0.535 0.592479
## TIME_TO_DOM_CONTENT_LOADED_END_MS -1.233e-05 7.330e-06 -1.683 0.092453
## TIME_TO_NON_BLANK_PAINT_MS -1.319e-06 1.249e-05 -0.106 0.915896
## locale_enUS -2.307e+00 5.981e-02 -38.571 < 2e-16
## locale_enGB NA NA NA NA
## distro_id_norm_Mozilla NA NA NA NA
## FX_PAGE_LOAD_MS_2_PARENT 4.987e-05 1.351e-05 3.692 0.000222
## session_length -1.308e-02 2.313e-03 -5.654 1.57e-08
## timezone_cat_4_6 -1.801e+00 3.765e-01 -4.784 1.72e-06
## timezone_cat_0_2 -1.255e+00 3.130e-01 -4.009 6.09e-05
## TIME_TO_DOM_INTERACTIVE_MS 5.844e-05 1.898e-05 3.079 0.002074
## TIME_TO_DOM_COMPLETE_MS -7.344e-05 2.101e-05 -3.495 0.000475
## timezone_cat_m6_m4 1.613e-01 2.304e-01 0.700 0.483670
## TIME_TO_LOAD_EVENT_END_MS -5.510e-06 2.210e-05 -0.249 0.803098
## country_US 5.653e-01 1.061e-01 5.326 1.00e-07
## timezone_offset 3.757e-04 4.036e-04 0.931 0.351912
## is_wow64_True -1.927e+00 4.494e-02 -42.883 < 2e-16
## num_addons -8.906e-01 1.032e-02 -86.287 < 2e-16
##
## (Intercept) ***
## cpu_l2_cache_kb_cat_l512 *
## profile_age ***
## timezone_cat_12_14
## cpu_vendor_Intel **
## default_search_engine_other_bundled ***
## timezone_cat_m4_m2 .
## cpu_vendor_AMD **
## default_search_engine_Bing ***
## timezone_cat_m12_m10 *
## timezone_cat_8_10 **
## cpu_l2_cache_kb_cat_l1024 **
## install_year ***
## is_default_browser_True ***
## cpu_speed_mhz
## cpu_l2_cache_kb_cat_g1024 .
## cpu_vendor_Other
## default_search_engine_DuckDuckGo ***
## cpu_l2_cache_kb_cat_l256
## memory_mb *
## cpu_l2_cache_kb .
## startup_ms_max ***
## default_search_engine_Yahoo
## startup_ms ***
## num_bookmarks *
## timezone_cat_m2_0 **
## num_active_days ***
## distro_id_norm_Yahoo ***
## cpu_cores ***
## default_search_engine_Google ***
## timezone_cat_m8_m6
## distro_id_norm_other ***
## default_search_engine_other_nonbundled
## distro_id_norm_acer ***
## sync_configured_True ***
## fxa_configured_True
## daily_num_sessions_started *
## timezone_cat_6_8 **
## timezone_cat_2_4 ***
## session_length_max
## daily_num_sessions_started_max
## TIME_TO_DOM_CONTENT_LOADED_END_MS .
## TIME_TO_NON_BLANK_PAINT_MS
## locale_enUS ***
## locale_enGB
## distro_id_norm_Mozilla
## FX_PAGE_LOAD_MS_2_PARENT ***
## session_length ***
## timezone_cat_4_6 ***
## timezone_cat_0_2 ***
## TIME_TO_DOM_INTERACTIVE_MS **
## TIME_TO_DOM_COMPLETE_MS ***
## timezone_cat_m6_m4
## TIME_TO_LOAD_EVENT_END_MS
## country_US ***
## timezone_offset
## is_wow64_True ***
## num_addons ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 70106 on 69999 degrees of freedom
## Residual deviance: 45021 on 69947 degrees of freedom
## AIC: 45127
##
## Number of Fisher Scoring iterations: 12
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##
## Call:
## glm(formula = generate_formula(exps[[exp]], label), family = binomial(),
## data = df_4x_sm)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -4.1297 -0.5250 -0.2522 -0.0357 8.4904
##
## Coefficients: (6 not defined because of singularities)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.207e+01 1.205e+01 -5.150 2.60e-07
## profile_age -8.109e-05 1.807e-05 -4.488 7.18e-06
## cpu_l2_cache_kb_cat_l512 -1.133e-02 8.409e-02 -0.135 0.892837
## timezone_cat_10_12 6.239e-01 1.004e+00 0.622 0.534157
## timezone_cat_12_14 -1.035e+01 1.739e+02 -0.060 0.952532
## cpu_vendor_Intel 3.221e+00 1.024e+00 3.147 0.001650
## cpu_vendor_AMD 3.329e+00 1.025e+00 3.248 0.001162
## timezone_cat_m4_m2 -1.057e+00 4.708e-01 -2.246 0.024680
## cpu_l2_cache_kb_cat_l1024 -2.885e-02 7.233e-02 -0.399 0.689980
## timezone_cat_m10_m8 -2.405e-01 2.516e-01 -0.956 0.339016
## timezone_cat_m12_m10 4.208e-01 2.526e-01 1.666 0.095678
## default_search_engine_other_bundled -1.658e+00 2.701e-01 -6.138 8.36e-10
## timezone_cat_8_10 -2.038e+00 6.573e-01 -3.101 0.001928
## default_search_engine_Bing -1.429e+00 1.112e-01 -12.847 < 2e-16
## cpu_vendor_Other NA NA NA NA
## install_year 3.279e-02 5.950e-03 5.511 3.57e-08
## is_default_browser_True 2.277e-01 2.499e-02 9.114 < 2e-16
## cpu_speed_mhz -1.879e-05 2.130e-05 -0.882 0.377842
## cpu_l2_cache_kb_cat_l256 1.588e-01 9.359e-02 1.696 0.089816
## cpu_l2_cache_kb_cat_g1024 NA NA NA NA
## startup_ms_max 3.338e-07 9.443e-08 3.535 0.000408
## cpu_l2_cache_kb -5.272e-05 2.805e-05 -1.879 0.060207
## memory_mb 4.117e-06 1.869e-06 2.203 0.027565
## default_search_engine_DuckDuckGo -1.013e+00 7.933e-02 -12.768 < 2e-16
## default_search_engine_Yahoo 9.326e+00 9.598e+01 0.097 0.922589
## startup_ms -2.511e-06 6.626e-07 -3.789 0.000151
## timezone_cat_m2_0 -1.170e+00 2.850e-01 -4.106 4.02e-05
## num_bookmarks 2.939e-05 1.240e-05 2.371 0.017754
## distro_id_norm_Yahoo 9.927e+00 1.264e+00 7.853 4.07e-15
## num_active_days 7.517e-02 6.311e-03 11.912 < 2e-16
## default_search_engine_Google -1.580e+00 4.689e-02 -33.686 < 2e-16
## timezone_cat_m8_m6 1.505e-02 7.466e-02 0.202 0.840212
## cpu_cores 5.915e-02 9.616e-03 6.151 7.69e-10
## distro_id_norm_other 5.194e+00 5.363e-01 9.686 < 2e-16
## default_search_engine_other_nonbundled NA NA NA NA
## sync_configured_True 1.113e+00 6.597e-02 16.864 < 2e-16
## distro_id_norm_acer 7.409e+00 6.499e-01 11.400 < 2e-16
## fxa_configured_True -6.675e-02 6.648e-02 -1.004 0.315342
## timezone_cat_2_4 -1.913e+00 2.909e-01 -6.575 4.86e-11
## session_length_max 6.184e-04 1.079e-03 0.573 0.566596
## timezone_cat_6_8 -1.410e+00 3.707e-01 -3.803 0.000143
## daily_num_sessions_started 2.757e-02 1.086e-02 2.539 0.011109
## TIME_TO_DOM_CONTENT_LOADED_END_MS -1.231e-05 7.330e-06 -1.680 0.093011
## daily_num_sessions_started_max 3.230e-03 6.190e-03 0.522 0.601824
## locale_enUS -2.307e+00 5.981e-02 -38.568 < 2e-16
## locale_enGB NA NA NA NA
## TIME_TO_NON_BLANK_PAINT_MS -1.085e-06 1.249e-05 -0.087 0.930822
## distro_id_norm_Mozilla NA NA NA NA
## session_length -1.309e-02 2.313e-03 -5.662 1.50e-08
## FX_PAGE_LOAD_MS_2_PARENT 4.999e-05 1.351e-05 3.701 0.000215
## timezone_cat_4_6 -1.856e+00 2.821e-01 -6.580 4.72e-11
## timezone_cat_0_2 -1.350e+00 1.911e-01 -7.064 1.61e-12
## TIME_TO_DOM_INTERACTIVE_MS 5.833e-05 1.898e-05 3.073 0.002117
## TIME_TO_DOM_COMPLETE_MS -7.356e-05 2.102e-05 -3.500 0.000466
## timezone_cat_m6_m4 NA NA NA NA
## TIME_TO_LOAD_EVENT_END_MS -5.623e-06 2.210e-05 -0.254 0.799168
## country_US 5.707e-01 1.064e-01 5.365 8.09e-08
## timezone_offset 1.864e-04 4.669e-04 0.399 0.689674
## is_wow64_True -1.927e+00 4.494e-02 -42.883 < 2e-16
## num_addons -8.906e-01 1.032e-02 -86.285 < 2e-16
##
## (Intercept) ***
## profile_age ***
## cpu_l2_cache_kb_cat_l512
## timezone_cat_10_12
## timezone_cat_12_14
## cpu_vendor_Intel **
## cpu_vendor_AMD **
## timezone_cat_m4_m2 *
## cpu_l2_cache_kb_cat_l1024
## timezone_cat_m10_m8
## timezone_cat_m12_m10 .
## default_search_engine_other_bundled ***
## timezone_cat_8_10 **
## default_search_engine_Bing ***
## cpu_vendor_Other
## install_year ***
## is_default_browser_True ***
## cpu_speed_mhz
## cpu_l2_cache_kb_cat_l256 .
## cpu_l2_cache_kb_cat_g1024
## startup_ms_max ***
## cpu_l2_cache_kb .
## memory_mb *
## default_search_engine_DuckDuckGo ***
## default_search_engine_Yahoo
## startup_ms ***
## timezone_cat_m2_0 ***
## num_bookmarks *
## distro_id_norm_Yahoo ***
## num_active_days ***
## default_search_engine_Google ***
## timezone_cat_m8_m6
## cpu_cores ***
## distro_id_norm_other ***
## default_search_engine_other_nonbundled
## sync_configured_True ***
## distro_id_norm_acer ***
## fxa_configured_True
## timezone_cat_2_4 ***
## session_length_max
## timezone_cat_6_8 ***
## daily_num_sessions_started *
## TIME_TO_DOM_CONTENT_LOADED_END_MS .
## daily_num_sessions_started_max
## locale_enUS ***
## locale_enGB
## TIME_TO_NON_BLANK_PAINT_MS
## distro_id_norm_Mozilla
## session_length ***
## FX_PAGE_LOAD_MS_2_PARENT ***
## timezone_cat_4_6 ***
## timezone_cat_0_2 ***
## TIME_TO_DOM_INTERACTIVE_MS **
## TIME_TO_DOM_COMPLETE_MS ***
## timezone_cat_m6_m4
## TIME_TO_LOAD_EVENT_END_MS
## country_US ***
## timezone_offset
## is_wow64_True ***
## num_addons ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 70106 on 69999 degrees of freedom
## Residual deviance: 45020 on 69946 degrees of freedom
## AIC: 45128
##
## Number of Fisher Scoring iterations: 12par(mfrow=c(2,3))
for (ps in ps_4x){
df_4x_sm$psvalue <- predict(ps, type="response")
out <- histbackback(split(df_4x_sm$psvalue, df_4x_sm$label_release), main="Propensity score before matching", xlab=c("release", "beta"))
# just adding color
barplot(-out$left, col="#111d5e" , horiz=TRUE, space=0, add=TRUE, axes=FALSE)
barplot(out$right, col="#b21f66", horiz=TRUE, space=0, add=TRUE, axes=FALSE)
}cem_results_1x <- list()
cem_models_1x <- list()
for (exp in names(exps)){
cem <- matchit(formula = generate_formula(exps[[exp]], label), df_1x_sm, 'cem')
res <- match.data(cem)
cem_models_1x[[exp]] <- cem
cem_results_1x[[exp]] <- res
#print(summary(cem))
}
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##
## Using 'treat'='1' as baseline group
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##
## Using 'treat'='1' as baseline group
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##
## Using 'treat'='1' as baseline group
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##
## Using 'treat'='1' as baseline group
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##
## Using 'treat'='1' as baseline group
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##
## Using 'treat'='1' as baseline groupcem_results_2x <- list()
cem_models_2x <- list()
for (exp in names(exps)){
cem <- matchit(formula = generate_formula(exps[[exp]], label), df_2x_sm, 'cem')
res <- match.data(cem)
cem_models_2x[[exp]] <- cem
cem_results_2x[[exp]] <- res
#print(summary(cem))
}
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##
## Using 'treat'='1' as baseline group
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##
## Using 'treat'='1' as baseline group
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##
## Using 'treat'='1' as baseline group
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##
## Using 'treat'='1' as baseline group
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##
## Using 'treat'='1' as baseline group
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##
## Using 'treat'='1' as baseline groupcem_results_4x <- list()
cem_models_4x <- list()
for (exp in names(exps)){
cem <- matchit(formula = generate_formula(exps[[exp]], label), df_4x_sm, 'cem')
res <- match.data(cem)
cem_models_4x[[exp]] <- cem
cem_results_4x[[exp]] <- res
#print(summary(cem))
}
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##
## Using 'treat'='1' as baseline group
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##
## Using 'treat'='1' as baseline group
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##
## Using 'treat'='1' as baseline group
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##
## Using 'treat'='1' as baseline group
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##
## Using 'treat'='1' as baseline group
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##
## Using 'treat'='1' as baseline groupnn_results_1x <- list()
nn_models_1x <- list()
for (exp in names(exps)){
nn <- matchit(formula = generate_formula(exps[[exp]], label), df_1x_sm, 'nearest', replace = TRUE)
res <- match.data(nn)
nn_models_1x[[exp]] <- nn
nn_results_1x[[exp]] <- res
#print(summary(nn))
}
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurrednn_results_2x <- list()
nn_models_2x <- list()
for (exp in names(exps)){
nn <- matchit(formula = generate_formula(exps[[exp]], label), df_2x_sm, 'nearest', replace = TRUE)
res <- match.data(nn)
nn_models_2x[[exp]] <- nn
nn_results_2x[[exp]] <- res
#print(summary(nn))
}
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurrednn_results_4x <- list()
nn_models_4x <- list()
for (exp in names(exps)){
nn <- matchit(formula = generate_formula(exps[[exp]], label), df_4x_sm, 'nearest', replace = TRUE)
res <- match.data(nn)
nn_models_4x[[exp]] <- nn
nn_results_4x[[exp]] <- res
#print(summary(nn))
}
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurredcounter <- 0
par(mfrow=c(2,2))
for (m.cem in cem_models_1x){
counter <- counter + 1
print(paste("Experiment ", counter))
plot(m.cem, type = "hist", col = "#111d5e")
}## [1] "Experiment 1"
## [1] "Experiment 2"
## [1] "Experiment 3"
## [1] "Experiment 4"
## [1] "Experiment 5"
## [1] "Experiment 6"
From the results above, we can see that the model using Nearest Neighbor matching with balanced sampling outperforms the other models.
Furthermore, experiments \(3\) and \(4\) (referring to the results of the feature selection with the stratified sampling, considering the top 5 and top 10, respectly) were the ones that presented the best results, for most of the tested models.
Control unit quantile values are plotted on the x-axis, and treated unit quantile values are plotted on the y-axis. If values fall below the 45 degree line, control units generally take lower values of the covariate.
Data points that fall exactly on the 45 degree line indicate that the marginal distributions are identical, that is, the empirical distributions are the same in the treated and control groups.
Deviations from the 45 degree line indicate differences in the empirical distribution. Therefore, despite the promising results of the previous analysis, we still have some variables with different distributions after matching.
### 1. Standardized difference
match.data_3 = match.data(nn_models_1x$exp_3)
treated1 <- (match.data_3$label_beta==1)
cov1 <- match.data_3 %>% dplyr::select(exp_3)
std.diff1 <- apply(cov1,2,function(x) 100*(mean(x[treated1])- mean(x[!treated1]))/(sqrt(0.5*(var(x[treated1]) + var(x[!treated1])))))
sort(abs(std.diff1))## num_active_days profile_age
## 0.7592789 0.7870619
## memory_mb daily_num_sessions_started_max
## 1.4746968 4.8306573
## daily_num_sessions_started num_bookmarks
## 5.0926030 6.6984607
## session_length_max TIME_TO_NON_BLANK_PAINT_MS
## 8.7393035 11.5737672
## TIME_TO_DOM_CONTENT_LOADED_END_MS FX_PAGE_LOAD_MS_2_PARENT
## 11.7824340 12.2450240
## session_length TIME_TO_DOM_INTERACTIVE_MS
## 12.9838242 17.2822520
## TIME_TO_DOM_COMPLETE_MS TIME_TO_LOAD_EVENT_END_MS
## 17.3060499 18.8906230
## num_addons
## 61.6702126
par(mfrow=c(1,2))
# before matching
df_1x_sm$psvalue <- predict(ps_1x$exp_3, type="response")
out <- histbackback(split(df_1x_sm$psvalue, df_1x_sm$label_release), main="Propensity score before matching", xlab=c("release", "beta"))
# just adding color
barplot(-out$left, col="#111d5e" , horiz=TRUE, space=0, add=TRUE, axes=FALSE)
barplot(out$right, col="#b21f66", horiz=TRUE, space=0, add=TRUE, axes=FALSE)
# after matching
out <- histbackback(split(match.data_3$psvalue, match.data_3$label_release), main="Propensity score after matching (NN)", xlab=c("release", "beta"))
# just adding color
barplot(-out$left, col="#111d5e" , horiz=TRUE, space=0, add=TRUE, axes=FALSE)
barplot(out$right, col="#b21f66", horiz=TRUE, space=0, add=TRUE, axes=FALSE)### 1. Standardized difference
match.data_4 = match.data(nn_models_1x$exp_4)
treated1 <- (match.data_4$label_beta==1)
cov1 <- match.data_4 %>% dplyr::select(exp_4)
std.diff1 <- apply(cov1,2,function(x) 100*(mean(x[treated1])- mean(x[!treated1]))/(sqrt(0.5*(var(x[treated1]) + var(x[!treated1])))))
sort(abs(std.diff1))## cpu_speed_mhz profile_age
## 0.422552 1.652182
## num_active_days default_search_engine_other_nonbundled
## 1.663933 2.084412
## memory_mb startup_ms_max
## 2.482622 4.312691
## startup_ms daily_num_sessions_started
## 5.481087 5.664462
## daily_num_sessions_started_max num_bookmarks
## 5.880259 7.030220
## session_length_max TIME_TO_DOM_CONTENT_LOADED_END_MS
## 9.428856 11.103165
## TIME_TO_NON_BLANK_PAINT_MS FX_PAGE_LOAD_MS_2_PARENT
## 12.903814 13.002187
## session_length TIME_TO_DOM_COMPLETE_MS
## 13.490112 17.705496
## TIME_TO_DOM_INTERACTIVE_MS TIME_TO_LOAD_EVENT_END_MS
## 17.975224 19.598032
## num_addons
## 62.570945
par(mfrow=c(1,2))
# before matching
df_1x_sm$psvalue <- predict(ps_1x$exp_4, type="response")
out <- histbackback(split(df_1x_sm$psvalue, df_1x_sm$label_release), main="Propensity score before matching", xlab=c("release", "beta"))
# just adding color
barplot(-out$left, col="#111d5e" , horiz=TRUE, space=0, add=TRUE, axes=FALSE)
barplot(out$right, col="#b21f66", horiz=TRUE, space=0, add=TRUE, axes=FALSE)
# after matching
out <- histbackback(split(match.data_4$psvalue, match.data_4$label_release), main="Propensity score after matching (NN)", xlab=c("release", "beta"))
# just adding color
barplot(-out$left, col="#111d5e" , horiz=TRUE, space=0, add=TRUE, axes=FALSE)
barplot(out$right, col="#b21f66", horiz=TRUE, space=0, add=TRUE, axes=FALSE)As can be seen from the figures above, there is a notable improvement in the match between the two distributions of propensity scores after the match (compared to the results before to match). This match suggests the two groups (beta and release) are much more similar in terms of their propensity scores and, therefore, the selection bias has been reduced substantially.
Comparing the two experiments, we can see that Experiment 3 showed better results. We confirm this below:
# Experiment 3
table_match_3 <- CreateTableOne(vars = exp_3, strata = "label_beta", data = match.data_3, test = FALSE, smd = TRUE)
kable(print(table_match_3, smd = TRUE)) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F) %>%
scroll_box(width = "100%")## Stratified by label_beta
## 0
## n 34973
## daily_num_sessions_started (mean (SD)) 2.86 (2.89)
## daily_num_sessions_started_max (mean (SD)) 5.20 (5.18)
## FX_PAGE_LOAD_MS_2_PARENT (mean (SD)) 3044.14 (1583.70)
## memory_mb (mean (SD)) 9375.50 (8260.04)
## num_active_days (mean (SD)) 5.56 (2.07)
## num_addons (mean (SD)) 5.67 (2.22)
## num_bookmarks (mean (SD)) 164.73 (714.23)
## profile_age (mean (SD)) 899.24 (769.36)
## session_length (mean (SD)) 9.21 (9.49)
## session_length_max (mean (SD)) 18.15 (19.99)
## TIME_TO_DOM_COMPLETE_MS (mean (SD)) 3304.50 (2697.53)
## TIME_TO_DOM_CONTENT_LOADED_END_MS (mean (SD)) 2291.19 (2217.80)
## TIME_TO_DOM_INTERACTIVE_MS (mean (SD)) 1797.96 (1487.42)
## TIME_TO_LOAD_EVENT_END_MS (mean (SD)) 3026.73 (2435.59)
## TIME_TO_NON_BLANK_PAINT_MS (mean (SD)) 1457.44 (1493.90)
## Stratified by label_beta
## 1 SMD
## n 12491
## daily_num_sessions_started (mean (SD)) 2.71 (3.14) 0.051
## daily_num_sessions_started_max (mean (SD)) 4.94 (5.58) 0.048
## FX_PAGE_LOAD_MS_2_PARENT (mean (SD)) 3252.46 (1811.27) 0.122
## memory_mb (mean (SD)) 9503.63 (9097.65) 0.015
## num_active_days (mean (SD)) 5.55 (2.21) 0.008
## num_addons (mean (SD)) 7.14 (2.54) 0.617
## num_bookmarks (mean (SD)) 243.61 (1504.37) 0.067
## profile_age (mean (SD)) 905.30 (769.50) 0.008
## session_length (mean (SD)) 10.66 (12.69) 0.130
## session_length_max (mean (SD)) 20.40 (30.49) 0.087
## TIME_TO_DOM_COMPLETE_MS (mean (SD)) 3866.19 (3713.67) 0.173
## TIME_TO_DOM_CONTENT_LOADED_END_MS (mean (SD)) 2588.08 (2789.29) 0.118
## TIME_TO_DOM_INTERACTIVE_MS (mean (SD)) 2115.94 (2134.99) 0.173
## TIME_TO_LOAD_EVENT_END_MS (mean (SD)) 3590.46 (3446.53) 0.189
## TIME_TO_NON_BLANK_PAINT_MS (mean (SD)) 1661.02 (1989.09) 0.116
| 0 | 1 | SMD | |
|---|---|---|---|
| n | 34973 | 12491 | |
| daily_num_sessions_started (mean (SD)) | 2.86 (2.89) | 2.71 (3.14) | 0.051 |
| daily_num_sessions_started_max (mean (SD)) | 5.20 (5.18) | 4.94 (5.58) | 0.048 |
| FX_PAGE_LOAD_MS_2_PARENT (mean (SD)) | 3044.14 (1583.70) | 3252.46 (1811.27) | 0.122 |
| memory_mb (mean (SD)) | 9375.50 (8260.04) | 9503.63 (9097.65) | 0.015 |
| num_active_days (mean (SD)) | 5.56 (2.07) | 5.55 (2.21) | 0.008 |
| num_addons (mean (SD)) | 5.67 (2.22) | 7.14 (2.54) | 0.617 |
| num_bookmarks (mean (SD)) | 164.73 (714.23) | 243.61 (1504.37) | 0.067 |
| profile_age (mean (SD)) | 899.24 (769.36) | 905.30 (769.50) | 0.008 |
| session_length (mean (SD)) | 9.21 (9.49) | 10.66 (12.69) | 0.130 |
| session_length_max (mean (SD)) | 18.15 (19.99) | 20.40 (30.49) | 0.087 |
| TIME_TO_DOM_COMPLETE_MS (mean (SD)) | 3304.50 (2697.53) | 3866.19 (3713.67) | 0.173 |
| TIME_TO_DOM_CONTENT_LOADED_END_MS (mean (SD)) | 2291.19 (2217.80) | 2588.08 (2789.29) | 0.118 |
| TIME_TO_DOM_INTERACTIVE_MS (mean (SD)) | 1797.96 (1487.42) | 2115.94 (2134.99) | 0.173 |
| TIME_TO_LOAD_EVENT_END_MS (mean (SD)) | 3026.73 (2435.59) | 3590.46 (3446.53) | 0.189 |
| TIME_TO_NON_BLANK_PAINT_MS (mean (SD)) | 1457.44 (1493.90) | 1661.02 (1989.09) | 0.116 |
# Experiment 4
table_match <- CreateTableOne(vars = exp_4, strata = "label_beta", data = match.data_4, test = FALSE, smd = TRUE)
kable(print(table_match, smd = TRUE)) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F) %>%
scroll_box(width = "100%")## Stratified by label_beta
## 0
## n 34973
## cpu_speed_mhz (mean (SD)) 2705.12 (633.84)
## daily_num_sessions_started (mean (SD)) 2.86 (2.89)
## daily_num_sessions_started_max (mean (SD)) 5.20 (5.18)
## default_search_engine_other_nonbundled (mean (SD)) 0.11 (0.31)
## FX_PAGE_LOAD_MS_2_PARENT (mean (SD)) 3044.14 (1583.70)
## memory_mb (mean (SD)) 9375.50 (8260.04)
## num_active_days (mean (SD)) 5.56 (2.07)
## num_addons (mean (SD)) 5.67 (2.22)
## num_bookmarks (mean (SD)) 164.73 (714.23)
## profile_age (mean (SD)) 899.24 (769.36)
## session_length (mean (SD)) 9.21 (9.49)
## session_length_max (mean (SD)) 18.15 (19.99)
## startup_ms (mean (SD)) 9754.55 (70572.92)
## startup_ms_max (mean (SD)) 24170.19 (332125.03)
## TIME_TO_DOM_COMPLETE_MS (mean (SD)) 3304.50 (2697.53)
## TIME_TO_DOM_CONTENT_LOADED_END_MS (mean (SD)) 2291.19 (2217.80)
## TIME_TO_DOM_INTERACTIVE_MS (mean (SD)) 1797.96 (1487.42)
## TIME_TO_LOAD_EVENT_END_MS (mean (SD)) 3026.73 (2435.59)
## TIME_TO_NON_BLANK_PAINT_MS (mean (SD)) 1457.44 (1493.90)
## Stratified by label_beta
## 1
## n 11970
## cpu_speed_mhz (mean (SD)) 2702.26 (717.96)
## daily_num_sessions_started (mean (SD)) 2.69 (3.10)
## daily_num_sessions_started_max (mean (SD)) 4.89 (5.50)
## default_search_engine_other_nonbundled (mean (SD)) 0.10 (0.30)
## FX_PAGE_LOAD_MS_2_PARENT (mean (SD)) 3265.91 (1819.38)
## memory_mb (mean (SD)) 9600.61 (9808.84)
## num_active_days (mean (SD)) 5.53 (2.19)
## num_addons (mean (SD)) 7.12 (2.42)
## num_bookmarks (mean (SD)) 246.35 (1478.42)
## profile_age (mean (SD)) 911.97 (772.08)
## session_length (mean (SD)) 10.73 (12.84)
## session_length_max (mean (SD)) 20.51 (29.26)
## startup_ms (mean (SD)) 16849.28 (168905.46)
## startup_ms_max (mean (SD)) 55123.72 (959150.05)
## TIME_TO_DOM_COMPLETE_MS (mean (SD)) 3875.17 (3674.32)
## TIME_TO_DOM_CONTENT_LOADED_END_MS (mean (SD)) 2562.44 (2649.21)
## TIME_TO_DOM_INTERACTIVE_MS (mean (SD)) 2123.82 (2088.19)
## TIME_TO_LOAD_EVENT_END_MS (mean (SD)) 3606.54 (3401.97)
## TIME_TO_NON_BLANK_PAINT_MS (mean (SD)) 1677.33 (1891.03)
## Stratified by label_beta
## SMD
## n
## cpu_speed_mhz (mean (SD)) 0.004
## daily_num_sessions_started (mean (SD)) 0.057
## daily_num_sessions_started_max (mean (SD)) 0.059
## default_search_engine_other_nonbundled (mean (SD)) 0.021
## FX_PAGE_LOAD_MS_2_PARENT (mean (SD)) 0.130
## memory_mb (mean (SD)) 0.025
## num_active_days (mean (SD)) 0.017
## num_addons (mean (SD)) 0.626
## num_bookmarks (mean (SD)) 0.070
## profile_age (mean (SD)) 0.017
## session_length (mean (SD)) 0.135
## session_length_max (mean (SD)) 0.094
## startup_ms (mean (SD)) 0.055
## startup_ms_max (mean (SD)) 0.043
## TIME_TO_DOM_COMPLETE_MS (mean (SD)) 0.177
## TIME_TO_DOM_CONTENT_LOADED_END_MS (mean (SD)) 0.111
## TIME_TO_DOM_INTERACTIVE_MS (mean (SD)) 0.180
## TIME_TO_LOAD_EVENT_END_MS (mean (SD)) 0.196
## TIME_TO_NON_BLANK_PAINT_MS (mean (SD)) 0.129
| 0 | 1 | SMD | |
|---|---|---|---|
| n | 34973 | 11970 | |
| cpu_speed_mhz (mean (SD)) | 2705.12 (633.84) | 2702.26 (717.96) | 0.004 |
| daily_num_sessions_started (mean (SD)) | 2.86 (2.89) | 2.69 (3.10) | 0.057 |
| daily_num_sessions_started_max (mean (SD)) | 5.20 (5.18) | 4.89 (5.50) | 0.059 |
| default_search_engine_other_nonbundled (mean (SD)) | 0.11 (0.31) | 0.10 (0.30) | 0.021 |
| FX_PAGE_LOAD_MS_2_PARENT (mean (SD)) | 3044.14 (1583.70) | 3265.91 (1819.38) | 0.130 |
| memory_mb (mean (SD)) | 9375.50 (8260.04) | 9600.61 (9808.84) | 0.025 |
| num_active_days (mean (SD)) | 5.56 (2.07) | 5.53 (2.19) | 0.017 |
| num_addons (mean (SD)) | 5.67 (2.22) | 7.12 (2.42) | 0.626 |
| num_bookmarks (mean (SD)) | 164.73 (714.23) | 246.35 (1478.42) | 0.070 |
| profile_age (mean (SD)) | 899.24 (769.36) | 911.97 (772.08) | 0.017 |
| session_length (mean (SD)) | 9.21 (9.49) | 10.73 (12.84) | 0.135 |
| session_length_max (mean (SD)) | 18.15 (19.99) | 20.51 (29.26) | 0.094 |
| startup_ms (mean (SD)) | 9754.55 (70572.92) | 16849.28 (168905.46) | 0.055 |
| startup_ms_max (mean (SD)) | 24170.19 (332125.03) | 55123.72 (959150.05) | 0.043 |
| TIME_TO_DOM_COMPLETE_MS (mean (SD)) | 3304.50 (2697.53) | 3875.17 (3674.32) | 0.177 |
| TIME_TO_DOM_CONTENT_LOADED_END_MS (mean (SD)) | 2291.19 (2217.80) | 2562.44 (2649.21) | 0.111 |
| TIME_TO_DOM_INTERACTIVE_MS (mean (SD)) | 1797.96 (1487.42) | 2123.82 (2088.19) | 0.180 |
| TIME_TO_LOAD_EVENT_END_MS (mean (SD)) | 3026.73 (2435.59) | 3606.54 (3401.97) | 0.196 |
| TIME_TO_NON_BLANK_PAINT_MS (mean (SD)) | 1457.44 (1493.90) | 1677.33 (1891.03) | 0.129 |
##
## Call:
## matchit(formula = generate_formula(exps[[exp]], label), data = df_1x_sm,
## method = "nearest", replace = TRUE)
##
## Summary of balance for all data:
## Means Treated Means Control SD Control
## distance 0.6201 0.3793 0.2011
## daily_num_sessions_started 2.8624 2.3903 2.7452
## daily_num_sessions_started_max 5.1994 4.3137 4.8323
## FX_PAGE_LOAD_MS_2_PARENT 3044.1381 3459.4753 1918.2850
## memory_mb 9375.4970 8954.2914 7959.4119
## num_active_days 5.5615 5.3423 2.2659
## num_addons 5.6684 7.8676 3.3668
## num_bookmarks 164.7282 244.0887 1285.4479
## profile_age 899.2399 893.4752 761.9573
## session_length 9.2079 12.2015 14.4045
## session_length_max 18.1451 22.5676 29.8243
## TIME_TO_DOM_COMPLETE_MS 3304.4993 4397.6110 4283.8936
## TIME_TO_DOM_CONTENT_LOADED_END_MS 2291.1852 2743.6285 2737.7315
## TIME_TO_DOM_INTERACTIVE_MS 1797.9580 2411.6625 2431.1910
## TIME_TO_LOAD_EVENT_END_MS 3026.7281 4134.4538 4026.6727
## TIME_TO_NON_BLANK_PAINT_MS 1457.4383 1837.6274 2148.1641
## Mean Diff eQQ Med eQQ Mean eQQ Max
## distance 0.2408 0.2500 0.2408 0.3106
## daily_num_sessions_started 0.4721 0.4000 0.4737 1.5750
## daily_num_sessions_started_max 0.8857 1.0000 0.8882 3.0000
## FX_PAGE_LOAD_MS_2_PARENT -415.3372 286.3527 415.2519 1217.0293
## memory_mb 421.2056 31.0000 542.1348 196547.0000
## num_active_days 0.2192 0.0000 0.2374 1.0000
## num_addons -2.1992 2.0000 2.1994 126.6667
## num_bookmarks -79.3605 1.0000 79.1581 21769.0000
## profile_age 5.7647 23.0000 25.2873 1369.0000
## session_length -2.9936 1.4564 2.9903 151.2126
## session_length_max -4.4224 2.8661 4.4036 858.3503
## TIME_TO_DOM_COMPLETE_MS -1093.1117 408.7792 1092.1439 11538.8571
## TIME_TO_DOM_CONTENT_LOADED_END_MS -452.4433 218.3252 451.4889 12439.4915
## TIME_TO_DOM_INTERACTIVE_MS -613.7044 236.0083 612.6128 20607.0000
## TIME_TO_LOAD_EVENT_END_MS -1107.7257 428.3251 1106.7063 11196.2118
## TIME_TO_NON_BLANK_PAINT_MS -380.1891 151.5375 379.0529 16102.5273
##
##
## Summary of balance for matched data:
## Means Treated Means Control SD Control
## distance 0.6201 0.6201 0.1995
## daily_num_sessions_started 2.8624 4.0732 4.9384
## daily_num_sessions_started_max 5.1994 7.6838 9.8565
## FX_PAGE_LOAD_MS_2_PARENT 3044.1381 2912.7561 1622.9078
## memory_mb 9375.4970 14598.4850 18130.7604
## num_active_days 5.5615 5.9370 2.0637
## num_addons 5.6684 6.2446 1.9905
## num_bookmarks 164.7282 851.4996 3905.5180
## profile_age 899.2399 949.5171 809.2864
## session_length 9.2079 8.1689 10.8524
## session_length_max 18.1451 17.1514 39.7136
## TIME_TO_DOM_COMPLETE_MS 3304.4993 3195.2355 3352.6550
## TIME_TO_DOM_CONTENT_LOADED_END_MS 2291.1852 2805.0239 3838.0612
## TIME_TO_DOM_INTERACTIVE_MS 1797.9580 1726.9272 1763.8950
## TIME_TO_LOAD_EVENT_END_MS 3026.7281 2854.4041 2673.1022
## TIME_TO_NON_BLANK_PAINT_MS 1457.4383 1397.7523 1622.3912
## Mean Diff eQQ Med eQQ Mean eQQ Max
## distance 0.0000 0.1645 0.1589 0.2015
## daily_num_sessions_started -1.2108 0.2321 0.2508 2.8750
## daily_num_sessions_started_max -2.4844 0.0000 0.4174 9.0000
## FX_PAGE_LOAD_MS_2_PARENT 131.3820 119.0923 208.8125 912.0536
## memory_mb -5222.9880 29.0000 727.7257 196581.0000
## num_active_days -0.3755 0.0000 0.1748 1.0000
## num_addons -0.5762 1.5000 1.4821 11.0000
## num_bookmarks -686.7714 2.0000 81.0448 21802.0000
## profile_age -50.2772 13.0000 19.4053 1597.0000
## session_length 1.0390 0.2017 1.4697 144.9158
## session_length_max 0.9938 0.6333 2.3059 858.3503
## TIME_TO_DOM_COMPLETE_MS 109.2638 147.9601 562.4198 11171.1250
## TIME_TO_DOM_CONTENT_LOADED_END_MS -513.8387 88.5859 298.0847 12439.4915
## TIME_TO_DOM_INTERACTIVE_MS 71.0308 78.5130 318.7461 20607.0000
## TIME_TO_LOAD_EVENT_END_MS 172.3240 147.1974 563.7810 7937.8281
## TIME_TO_NON_BLANK_PAINT_MS 59.6860 56.2857 202.8511 9919.0000
##
## Percent Balance Improvement:
## Mean Diff. eQQ Med eQQ Mean eQQ Max
## distance 99.9987 34.2278 34.0267 35.1400
## daily_num_sessions_started -156.4632 41.9643 47.0445 -82.5397
## daily_num_sessions_started_max -180.5042 100.0000 53.0022 -200.0000
## FX_PAGE_LOAD_MS_2_PARENT 68.3674 58.4106 49.7142 25.0590
## memory_mb -1140.0092 6.4516 -34.2334 -0.0173
## num_active_days -71.2910 0.0000 26.3693 0.0000
## num_addons 73.7992 25.0000 32.6121 91.3158
## num_bookmarks -765.3816 -100.0000 -2.3834 -0.1516
## profile_age -772.1575 43.4783 23.2609 -16.6545
## session_length 65.2940 86.1510 50.8513 4.1642
## session_length_max 77.5289 77.9027 47.6370 0.0000
## TIME_TO_DOM_COMPLETE_MS 90.0043 63.8044 48.5031 3.1869
## TIME_TO_DOM_CONTENT_LOADED_END_MS -13.5698 59.4248 33.9774 0.0000
## TIME_TO_DOM_INTERACTIVE_MS 88.4259 66.7330 47.9694 0.0000
## TIME_TO_LOAD_EVENT_END_MS 84.4434 65.6342 49.0578 29.1026
## TIME_TO_NON_BLANK_PAINT_MS 84.3010 62.8569 46.4848 38.4010
##
## Sample sizes:
## Control Treated
## All 35027 34973
## Matched 12491 34973
## Unmatched 22536 0
## Discarded 0 0